Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; @@ -107,7 +107,7 @@ /** * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * {@link ArabicStemFilter}. * * @param matchVersion @@ -131,7 +131,7 @@ * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link StopFilter}, - * {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter} + * {@link ArabicNormalizationFilter}, {@link SetKeywordMarkerFilter} * if a stem exclusion set is provided and {@link ArabicStemFilter}. */ @Override @@ -144,7 +144,7 @@ // TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?! result = new ArabicNormalizationFilter(result); if(!stemExclusionSet.isEmpty()) { - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); } return new TokenStreamComponents(source, new ArabicStemFilter(result)); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // javadoc @link +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // javadoc @link import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -29,10 +29,10 @@ * A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words.. *
* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
- * @see KeywordMarkerFilter */ + * @see SetKeywordMarkerFilter */ public final class ArabicStemFilter extends TokenFilter { private final ArabicStemmer stemmer = new ArabicStemmer(); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -97,7 +97,7 @@ /** * Builds an analyzer with the given stop words and a stem exclusion set. - * If a stem exclusion set is provided this analyzer will add a {@link KeywordMarkerFilter} + * If a stem exclusion set is provided this analyzer will add a {@link SetKeywordMarkerFilter} * before {@link BulgarianStemFilter}. */ public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { @@ -114,7 +114,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link BulgarianStemFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new BulgarianStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; @@ -131,7 +131,7 @@ result = new StandardFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(excltable != null && !excltable.isEmpty()) - result = new KeywordMarkerFilter(result, excltable); + result = new SetKeywordMarkerFilter(result, excltable); return new TokenStreamComponents(source, new BrazilianStemFilter(result)); } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,10 +30,10 @@ * A {@link TokenFilter} that applies {@link BrazilianStemmer}. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
- * @see KeywordMarkerFilter + * @see SetKeywordMarkerFilter * */ public final class BrazilianStemFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -97,7 +97,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -119,7 +119,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, - * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem exclusion set is + * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -131,7 +131,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new CatalanStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (working copy) @@ -52,13 +52,7 @@ @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - final char[] buffer = termAtt.buffer(); - final int length = termAtt.length(); - for (int i = 0; i < length;) { - i += Character.toChars( - Character.toLowerCase( - charUtils.codePointAt(buffer, i)), buffer, i); - } + charUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length()); return true; } else return false; Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (working copy) @@ -20,7 +20,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -115,7 +115,7 @@ * , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If * a stem exclusion set is provided via * {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a - * {@link KeywordMarkerFilter} is added before + * {@link SetKeywordMarkerFilter} is added before * {@link CzechStemFilter}. */ @Override @@ -126,7 +126,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter( matchVersion, result, stopwords); if(!this.stemExclusionTable.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionTable); + result = new SetKeywordMarkerFilter(result, stemExclusionTable); result = new CzechStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java (working copy) @@ -2,7 +2,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -29,12 +29,12 @@ * A {@link TokenFilter} that applies {@link CzechStemmer} to stem Czech words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*NOTE: Input is expected to be in lowercase, * but with diacritical marks
- * @see KeywordMarkerFilter + * @see SetKeywordMarkerFilter */ public final class CzechStemFilter extends TokenFilter { private final CzechStemmer stemmer = new CzechStemmer(); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new DanishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -129,7 +129,7 @@ * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from a {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided, {@link GermanNormalizationFilter} and {@link GermanLightStemFilter} */ @Override @@ -139,7 +139,7 @@ TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter( matchVersion, result, stopwords); - result = new KeywordMarkerFilter(result, exclusionSet); + result = new SetKeywordMarkerFilter(result, exclusionSet); result = new GermanNormalizationFilter(result); result = new GermanLightStemFilter(result); return new TokenStreamComponents(source, result); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -34,10 +34,10 @@ * ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
- * @see KeywordMarkerFilter + * @see SetKeywordMarkerFilter */ public final class GermanStemFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; @@ -73,7 +73,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -96,7 +96,7 @@ * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link EnglishPossessiveFilter}, * {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link PorterStemFilter}. */ @Override @@ -108,7 +108,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * English words. *
* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -90,7 +90,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -112,7 +112,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SpanishLightStemFilter}. */ @Override @@ -123,7 +123,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SpanishLightStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -89,7 +89,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -111,7 +111,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -122,7 +122,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new BasqueStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new FinnishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (working copy) @@ -20,7 +20,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -133,7 +133,7 @@ * built from a {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link ElisionFilter}, * {@link LowerCaseFilter}, {@link StopFilter}, - * {@link KeywordMarkerFilter} if a stem exclusion set is + * {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided, and {@link FrenchLightStemFilter} */ @Override @@ -145,7 +145,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!excltable.isEmpty()) - result = new KeywordMarkerFilter(result, excltable); + result = new SetKeywordMarkerFilter(result, excltable); result = new FrenchLightStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -107,7 +107,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -129,7 +129,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link IrishLowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -144,7 +144,7 @@ result = new IrishLowerCaseFilter(result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new IrishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -89,7 +89,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -111,7 +111,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link GalicianStemFilter}. */ @Override @@ -122,7 +122,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new GalicianStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Galician words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Galician words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java (working copy) @@ -20,7 +20,7 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; @@ -111,7 +111,7 @@ * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from a {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link IndicNormalizationFilter}, - * {@link HindiNormalizationFilter}, {@link KeywordMarkerFilter} + * {@link HindiNormalizationFilter}, {@link SetKeywordMarkerFilter} * if a stem exclusion set is provided, {@link HindiStemFilter}, and * Hindi Stop words */ @@ -121,7 +121,7 @@ final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new LowerCaseFilter(matchVersion, source); if (!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new IndicNormalizationFilter(result); result = new HindiNormalizationFilter(result); result = new StopFilter(matchVersion, result, stopwords); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // javadoc @link +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // javadoc @link import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -31,7 +31,7 @@ ** In some cases the normalization may cause unrelated terms to conflate, so * to prevent terms from being normalized use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
* @see HindiNormalizer Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new HungarianStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Hungarian words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java (working copy) @@ -175,10 +175,7 @@ @SuppressWarnings("unchecked") public List* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java (working copy) @@ -22,41 +22,28 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.util.CharArraySet; /** - * Marks terms as keywords via the {@link KeywordAttribute}. Each token - * contained in the provided is marked as a keyword by setting - * {@link KeywordAttribute#setKeyword(boolean)} totrue.
+ * Marks terms as keywords via the {@link KeywordAttribute}.
*
* @see KeywordAttribute
*/
-public final class KeywordMarkerFilter extends TokenFilter {
+public abstract class KeywordMarkerFilter extends TokenFilter {
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final CharArraySet keywordSet;
/**
- * Create a new KeywordMarkerFilter, that marks the current token as a
- * keyword if the tokens term buffer is contained in the given set via the
- * {@link KeywordAttribute}.
- *
- * @param in
- * TokenStream to filter
- * @param keywordSet
- * the keywords set to lookup the current termbuffer
+ * Creates a new {@link KeywordMarkerFilter}
+ * @param in the input stream
*/
- public KeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) {
+ protected KeywordMarkerFilter(TokenStream in) {
super(in);
- this.keywordSet = keywordSet;
}
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) {
+ if (isKeyword()) {
keywordAttr.setKeyword(true);
}
return true;
@@ -64,4 +51,7 @@
return false;
}
}
+
+ protected abstract boolean isKeyword();
+
}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (revision 1455119)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.io.IOException;
+import java.util.regex.Pattern;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.*;
@@ -29,23 +30,30 @@
* <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- * <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" ignoreCase="false"/>
+ * <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" pattern="^.+er$" ignoreCase="false"/>
* </analyzer>
* </fieldType>
*
*/
public class KeywordMarkerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String PROTECTED_TOKENS = "protected";
+ public static final String PATTERN = "pattern";
private CharArraySet protectedWords;
private boolean ignoreCase;
+ private Pattern pattern;
@Override
public void inform(ResourceLoader loader) throws IOException {
String wordFiles = args.get(PROTECTED_TOKENS);
+ String stringPattern = args.get(PATTERN);
ignoreCase = getBoolean("ignoreCase", false);
if (wordFiles != null) {
protectedWords = getWordSet(loader, wordFiles, ignoreCase);
}
+ if (stringPattern != null) {
+ pattern = ignoreCase ? Pattern.compile(stringPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : Pattern.compile(stringPattern);
+ }
+
}
public boolean isIgnoreCase() {
@@ -54,6 +62,12 @@
@Override
public TokenStream create(TokenStream input) {
- return protectedWords == null ? input : new KeywordMarkerFilter(input, protectedWords);
+ if (pattern != null) {
+ input = new PatternKeywordMarkerFilter(input, pattern);
+ }
+ if (protectedWords != null) {
+ input = new SetKeywordMarkerFilter(input, protectedWords);
+ }
+ return input;
}
}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java (working copy)
@@ -0,0 +1,57 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * Marks terms as keywords via the {@link KeywordAttribute}. Each token
+ * that matches the provided pattern is marked as a keyword by setting
+ * {@link KeywordAttribute#setKeyword(boolean)} to true.
+ */
+public class PatternKeywordMarkerFilter extends KeywordMarkerFilter {
+
+ private final Matcher matcher;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ /**
+ * Create a new {@link PatternKeywordMarkerFilter}, that marks the current
+ * token as a keyword if the tokens term buffer matches the provided
+ * {@link Pattern} via the {@link KeywordAttribute}.
+ *
+ * @param in
+ * TokenStream to filter
+ * @param pattern
+ * the pattern to apply to the incoming term buffer
+ **/
+ protected PatternKeywordMarkerFilter(TokenStream in, Pattern pattern) {
+ super(in);
+ this.matcher = pattern.matcher("");
+ }
+
+ @Override
+ protected boolean isKeyword() {
+ matcher.reset(termAtt);
+ return matcher.matches();
+ }
+
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Date Author Id Revision HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java (working copy)
@@ -0,0 +1,52 @@
+package org.apache.lucene.analysis.miscellaneous;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
+
+/**
+ * Marks terms as keywords via the {@link KeywordAttribute}. Each token
+ * contained in the provided set is marked as a keyword by setting
+ * {@link KeywordAttribute#setKeyword(boolean)} to true.
+ */
+public final class SetKeywordMarkerFilter extends KeywordMarkerFilter {
+ private final CharArraySet keywordSet;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ /**
+ * Create a new KeywordSetMarkerFilter, that marks the current token as a
+ * keyword if the tokens term buffer is contained in the given set via the
+ * {@link KeywordAttribute}.
+ *
+ * @param in
+ * TokenStream to filter
+ * @param keywordSet
+ * the keywords set to lookup the current termbuffer
+ */
+ public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) {
+ super(in);
+ this.keywordSet = keywordSet;
+ }
+
+ @Override
+ protected boolean isKeyword() {
+ return keywordSet.contains(termAtt);
+ }
+
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Date Author Id Revision HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (revision 1455119)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (working copy)
@@ -20,7 +20,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
@@ -129,7 +129,7 @@
*
* @return A {@link TokenStream} built from a {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
- * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem exclusion set is provided,
+ * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is provided,
* {@link StemmerOverrideFilter}, and {@link SnowballFilter}
*/
@Override
@@ -140,7 +140,7 @@
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stoptable);
if (!excltable.isEmpty())
- result = new KeywordMarkerFilter(result, excltable);
+ result = new SetKeywordMarkerFilter(result, excltable);
if (!stemdict.isEmpty())
result = new StemmerOverrideFilter(matchVersion, result, stemdict);
result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java (revision 1455119)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java (working copy)
@@ -23,7 +23,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -91,7 +91,7 @@
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
- * provided this analyzer will add a {@link KeywordMarkerFilter} before
+ * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
* @param matchVersion lucene compatibility version
@@ -113,7 +113,7 @@
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
- * , {@link KeywordMarkerFilter} if a stem exclusion set is
+ * , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
@@ -124,7 +124,7 @@
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
- result = new KeywordMarkerFilter(result, stemExclusionSet);
+ result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new NorwegianStemmer());
return new TokenStreamComponents(source, result);
}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java (revision 1455119)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java (working copy)
@@ -21,7 +21,7 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
@@ -30,7 +30,7 @@
* words.
* * To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -90,7 +90,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -112,7 +112,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link PortugueseLightStemFilter}. */ @Override @@ -123,7 +123,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PortugueseLightStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Portuguese words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Portuguese words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Portuguese words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -94,7 +94,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -116,7 +116,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -127,7 +127,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new RomanianStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (working copy) @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.IOUtils; @@ -111,7 +111,7 @@ * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from a {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided, and {@link SnowballFilter} */ @Override @@ -122,7 +122,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (working copy) @@ -20,7 +20,7 @@ import java.util.Map; import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -74,7 +74,7 @@ } if (protectedWords != null) - input = new KeywordMarkerFilter(input, protectedWords); + input = new SetKeywordMarkerFilter(input, protectedWords); return new SnowballFilter(input, program); } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new SwedishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. ** To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -93,7 +93,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -115,7 +115,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link TurkishLowerCaseFilter}, - * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem + * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem * exclusion set is provided and {@link SnowballFilter}. */ @Override @@ -126,7 +126,7 @@ result = new TurkishLowerCaseFilter(result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new TurkishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java (working copy) @@ -131,8 +131,26 @@ } return new CharacterBuffer(new char[bufferSize], 0, 0); } - + + /** + * Converts each unicode codepoint to lowerCase via {@link Character#toLowerCase(int)} starting + * at the given offset. + * @param buffer the char buffer to lowercase + * @param offset the offset to start at + * @param limit the max char in the buffer to lower case + */ + public void toLowerCase(final char[] buffer, final int offset, final int limit) { + assert buffer.length >= limit; + assert offset <=0 && offset <= buffer.length; + for (int i = offset; i < limit;) { + i += Character.toChars( + Character.toLowerCase( + codePointAt(buffer, i)), buffer, i); + } + } + + /** * Fills the {@link CharacterBuffer} with characters read from the given * reader {@link Reader}. This method tries to read as many characters into * the {@link CharacterBuffer} as possible, each call to fill will start Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (working copy) @@ -215,12 +215,9 @@ * The user should never modify this text array after calling this method. */ public V put(char[] text, V value) { - if (ignoreCase) - for(int i=0;i* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *
*/ Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java =================================================================== --- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java (revision 1455119) +++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java (working copy) @@ -35,7 +35,7 @@ * ** In order to prevent terms from being stemmed, use an instance of - * {@link org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter} + * {@link org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter} * or a custom {@link TokenFilter} that sets the {@link KeywordAttribute} * before this {@link TokenStream}. *
Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java (revision 1455119) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase { @@ -49,7 +49,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink)); } }; Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java (revision 1455119) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import java.io.IOException; @@ -70,7 +70,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink)); } }; Index: lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java =================================================================== --- lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (revision 1455119) +++ lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -112,7 +112,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -135,7 +135,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link StempelFilter}. */ @Override @@ -146,7 +146,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new StempelFilter(result, new StempelStemmer(stemTable)); return new TokenStreamComponents(source, result); }