Index: solr/core/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java (revision 1144761) +++ solr/core/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java (revision ) @@ -35,7 +35,9 @@ import java.util.regex.Pattern; import java.io.IOException; +import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*; + /** * Factory for {@link WordDelimiterFilter}. *
@@ -80,38 +82,44 @@ } private CharArraySet protectedWords = null; - - int generateWordParts=0; - int generateNumberParts=0; - int catenateWords=0; - int catenateNumbers=0; - int catenateAll=0; - int splitOnCaseChange=0; - int splitOnNumerics=0; - int preserveOriginal=0; - int stemEnglishPossessive=0; + private int flags; byte[] typeTable = null; @Override public void init(Mapargs) { super.init(args); - generateWordParts = getInt("generateWordParts", 1); - generateNumberParts = getInt("generateNumberParts", 1); - catenateWords = getInt("catenateWords", 0); - catenateNumbers = getInt("catenateNumbers", 0); - catenateAll = getInt("catenateAll", 0); - splitOnCaseChange = getInt("splitOnCaseChange", 1); - splitOnNumerics = getInt("splitOnNumerics", 1); - preserveOriginal = getInt("preserveOriginal", 0); - stemEnglishPossessive = getInt("stemEnglishPossessive", 1); + if (getInt("generateWordParts", 1) != 0) { + flags |= GENERATE_WORD_PARTS; - } + } + if (getInt("generateNumberParts", 1) != 0) { + flags |= GENERATE_NUMBER_PARTS; + } + if (getInt("catenateWords", 0) != 0) { + flags |= CATENATE_WORDS; + } + if (getInt("catenateNumbers", 0) != 0) { + flags |= CATENATE_NUMBERS; + } + if (getInt("catenateAll", 0) != 0) { + flags |= CATENATE_ALL; + } + if (getInt("splitOnCaseChange", 1) != 0) { + flags |= SPLIT_ON_CASE_CHANGE; + } + if (getInt("splitOnNumerics", 1) != 0) { + flags |= SPLIT_ON_NUMERICS; + } + if (getInt("preserveOriginal", 0) != 0) { + flags |= PRESERVE_ORIGINAL; + } + if (getInt("stemEnglishPossessive", 1) != 0) { + flags |= STEM_ENGLISH_POSSESSIVE; + } + } public WordDelimiterFilter create(TokenStream input) { return new WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable, - generateWordParts, generateNumberParts, - catenateWords, catenateNumbers, catenateAll, - splitOnCaseChange, preserveOriginal, - splitOnNumerics, stemEnglishPossessive, protectedWords); + flags, protectedWords); } // source => type @@ -144,17 +152,17 @@ private Byte parseType(String s) { if (s.equals("LOWER")) - return WordDelimiterFilter.LOWER; + return LOWER; else if (s.equals("UPPER")) - return WordDelimiterFilter.UPPER; + return UPPER; else if (s.equals("ALPHA")) - return WordDelimiterFilter.ALPHA; + return ALPHA; else if (s.equals("DIGIT")) - return WordDelimiterFilter.DIGIT; + return DIGIT; else if (s.equals("ALPHANUM")) - return WordDelimiterFilter.ALPHANUM; + return ALPHANUM; else if (s.equals("SUBWORD_DELIM")) - return WordDelimiterFilter.SUBWORD_DELIM; + return SUBWORD_DELIM; else return null; } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java (revision 1040463) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java (revision ) @@ -72,53 +72,74 @@ public static final int ALPHANUM = 0x07; /** - * If true, causes parts of words to be generated: + * Causes parts of words to be generated: * * "PowerShot" => "Power" "Shot" */ - final boolean generateWordParts; + public static final int GENERATE_WORD_PARTS = 1; /** - * If true, causes number subwords to be generated: + * Causes number subwords to be generated: * * "500-42" => "500" "42" */ - final boolean generateNumberParts; + public static final int GENERATE_NUMBER_PARTS = 2; /** - * If true, causes maximum runs of word parts to be catenated: + * Causes maximum runs of word parts to be catenated: * * "wi-fi" => "wifi" */ - final boolean catenateWords; + public static final int CATENATE_WORDS = 4; /** - * If true, causes maximum runs of number parts to be catenated: + * Causes maximum runs of word parts to be catenated: * - * "500-42" => "50042" + * "wi-fi" => "wifi" */ - final boolean catenateNumbers; + public static final int CATENATE_NUMBERS = 8; /** - * If true, causes all subword parts to be catenated: + * Causes all subword parts to be catenated: * * "wi-fi-4000" => "wifi4000" */ - final boolean catenateAll; + public static final int CATENATE_ALL = 16; /** - * If true, original words are preserved and added to the subword list (Defaults to false) + * Causes original words are preserved and added to the subword list (Defaults to false) * * "500-42" => "500" "42" "500-42" */ - final boolean preserveOriginal; + public static final int PRESERVE_ORIGINAL = 32; - + /** + * If not set, causes case changes to be ignored (subwords will only be generated + * given SUBWORD_DELIM tokens) + */ + public static final int SPLIT_ON_CASE_CHANGE = 64; + + /** + * If not set, causes numeric changes to be ignored (subwords will only be generated + * given SUBWORD_DELIM tokens). + */ + public static final int SPLIT_ON_NUMERICS = 128; + + /** + * Causes trailing "'s" to be removed for each subword + * + * "O'Neil's" => "O", "Neil" + */ + public static final int STEM_ENGLISH_POSSESSIVE = 256; + + /** * If not null is the set of tokens to protect from being delimited * */ final CharArraySet protWords; - + + private final int flags; + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); @@ -154,6 +175,33 @@ private boolean hasOutputFollowingOriginal = false; /** + * Creates a new WordDelimiterFilter + * + * @param in TokenStream to be filtered + * @param charTypeTable table containing character types + * @param configurationFlags Flags configuring the filter + * @param protWords If not null is the set of tokens to protect from being delimited + */ + public WordDelimiterFilter(TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) { + super(in); + this.flags = configurationFlags; + this.protWords = protWords; + this.iterator = new WordDelimiterIterator(charTypeTable, configurationFlags); + } + + /** + * Creates a new WordDelimiterFilter using {@link WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE} + * as its charTypeTable + * + * @param in TokenStream to be filtered + * @param configurationFlags Flags configuring the filter + * @param protWords If not null is the set of tokens to protect from being delimited + */ + public WordDelimiterFilter(TokenStream in, int configurationFlags, CharArraySet protWords) { + this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords); + } + + /** * @param in Token stream to be filtered. * @param charTypeTable * @param generateWordParts If 1, causes parts of words to be generated: "PowerShot" => "Power" "Shot" @@ -166,7 +214,9 @@ * @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se" * @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" * @param protWords If not null is the set of tokens to protect from being delimited + * @deprecated Use {@link #WordDelimiterFilter(TokenStream, byte[], int, CharArraySet)} */ + @Deprecated public WordDelimiterFilter(TokenStream in, byte[] charTypeTable, int generateWordParts, @@ -180,14 +230,38 @@ int stemEnglishPossessive, CharArraySet protWords) { super(in); - this.generateWordParts = generateWordParts != 0; - this.generateNumberParts = generateNumberParts != 0; - this.catenateWords = catenateWords != 0; - this.catenateNumbers = catenateNumbers != 0; - this.catenateAll = catenateAll != 0; - this.preserveOriginal = preserveOriginal != 0; + + int flags = 0; + if (generateWordParts != 0) { + flags |= GENERATE_WORD_PARTS; + } + if (generateNumberParts != 0) { + flags |= GENERATE_NUMBER_PARTS; + } + if (catenateWords != 0) { + flags |= CATENATE_WORDS; + } + if (catenateNumbers != 0) { + flags |= CATENATE_NUMBERS; + } + if (catenateAll != 0) { + flags |= CATENATE_ALL; + } + if (preserveOriginal != 0) { + flags |= PRESERVE_ORIGINAL; + } + if (splitOnCaseChange != 0) { + flags |= SPLIT_ON_CASE_CHANGE; + } + if (splitOnNumerics != 0) { + flags |= SPLIT_ON_NUMERICS; + } + if (stemEnglishPossessive != 0) { + flags |= STEM_ENGLISH_POSSESSIVE; + } this.protWords = protWords; - this.iterator = new WordDelimiterIterator(charTypeTable, splitOnCaseChange != 0, splitOnNumerics != 0, stemEnglishPossessive != 0); + this.iterator = new WordDelimiterIterator(charTypeTable, flags); + this.flags = flags; } /** @@ -202,7 +276,9 @@ * @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se" * @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" * @param protWords If not null is the set of tokens to protect from being delimited + * @deprecated Use {@link #WordDelimiterFilter(TokenStream, int, CharArraySet)} */ + @Deprecated public WordDelimiterFilter(TokenStream in, int generateWordParts, int generateNumberParts, @@ -242,7 +318,7 @@ } // word of simply delimiters - if (iterator.end == WordDelimiterIterator.DONE && !preserveOriginal) { + if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL)) { // if the posInc is 1, simply ignore it in the accumulation if (posIncAttribute.getPositionIncrement() == 1) { accumPosInc--; @@ -253,10 +329,10 @@ saveState(); hasOutputToken = false; - hasOutputFollowingOriginal = !preserveOriginal; + hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL); lastConcatCount = 0; - if (preserveOriginal) { + if (has(PRESERVE_ORIGINAL)) { posIncAttribute.setPositionIncrement(accumPosInc); accumPosInc = 0; return true; @@ -312,7 +388,7 @@ } // add all subwords (catenateAll) - if (catenateAll) { + if (has(CATENATE_ALL)) { concatenate(concatAll); } @@ -385,7 +461,7 @@ * @return {@code true} if concatenation should occur, {@code false} otherwise */ private boolean shouldConcatenate(int wordType) { - return (catenateWords && isAlpha(wordType)) || (catenateNumbers && isDigit(wordType)); + return (has(CATENATE_WORDS) && isAlpha(wordType)) || (has(CATENATE_NUMBERS) && isDigit(wordType)); } /** @@ -395,7 +471,7 @@ * @return {@code true} if a word/number part should be generated, {@code false} otherwise */ private boolean shouldGenerateParts(int wordType) { - return (generateWordParts && isAlpha(wordType)) || (generateNumberParts && isDigit(wordType)); + return (has(GENERATE_WORD_PARTS) && isAlpha(wordType)) || (has(GENERATE_NUMBER_PARTS) && isDigit(wordType)); } /** @@ -494,6 +570,16 @@ return (type & UPPER) != 0; } + /** + * Determines whether the given flag is set + * + * @param flag Flag to see if set + * @return {@code} true if flag is set + */ + private boolean has(int flag) { + return (flags & flag) != 0; + } + // ================================================= Inner Classes ================================================= /** Index: modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (revision 1150091) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (revision ) @@ -36,6 +36,9 @@ import java.util.Arrays; import java.util.HashSet; +import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*; +import static org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE; + /** * New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest * TODO: should explicitly test things like protWords and not rely on @@ -63,17 +66,17 @@ @Test public void testOffsets() throws IOException { - + int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; // test that subwords and catenated subwords have // the correct offsets. - WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 12)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null); + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 12)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "foo", "bar", "foobar" }, new int[] { 5, 9, 5 }, new int[] { 8, 12, 12 }); - wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 6)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null); + wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 6)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "foo", "bar", "foobar" }, @@ -82,9 +85,9 @@ } @Test - public void testOffsetChange() throws Exception - { - WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("Ÿbelkeit)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null); + public void testOffsetChange() throws Exception { + int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("Ÿbelkeit)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "Ÿbelkeit" }, @@ -93,9 +96,9 @@ } @Test - public void testOffsetChange2() throws Exception - { - WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(Ÿbelkeit", 7, 17)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null); + public void testOffsetChange2() throws Exception { + int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(Ÿbelkeit", 7, 17)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "Ÿbelkeit" }, @@ -104,9 +107,9 @@ } @Test - public void testOffsetChange3() throws Exception - { - WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(Ÿbelkeit", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null); + public void testOffsetChange3() throws Exception { + int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(Ÿbelkeit", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "Ÿbelkeit" }, @@ -115,9 +118,9 @@ } @Test - public void testOffsetChange4() throws Exception - { - WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null); + public void testOffsetChange4() throws Exception { + int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "foo", "bar", "foobar"}, @@ -126,8 +129,9 @@ } public void doSplit(final String input, String... output) throws Exception { + int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer( - new StringReader(input), MockTokenizer.KEYWORD, false), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 0, 1, 0, 1, 1, null); + new StringReader(input), MockTokenizer.KEYWORD, false), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, output); } @@ -168,8 +172,10 @@ } public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception { + int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS; + flags |= (stemPossessive == 1) ? STEM_ENGLISH_POSSESSIVE : 0; WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer( - new StringReader(input), MockTokenizer.KEYWORD, false), 1,1,0,0,0,1,0,1,stemPossessive, null); + new StringReader(input), MockTokenizer.KEYWORD, false), flags, null); assertTokenStreamContents(wdf, output); } @@ -208,6 +214,7 @@ @Test public void testPositionIncrements() throws Exception { + final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet (Arrays.asList("NUTCH")), false); /* analyzer that uses whitespace + wdf */ @@ -216,7 +223,7 @@ public TokenStream tokenStream(String field, Reader reader) { return new WordDelimiterFilter( new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), - 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords); + flags, protWords); } }; @@ -244,7 +251,7 @@ return new WordDelimiterFilter( new LargePosIncTokenFilter( new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)), - 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords); + flags, protWords); } }; @@ -277,8 +284,7 @@ StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET); filter.setEnablePositionIncrements(true); - return new WordDelimiterFilter(filter, - 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords); + return new WordDelimiterFilter(filter, flags, protWords); } }; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java (revision 990451) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java (revision ) @@ -50,21 +50,22 @@ * If false, causes case changes to be ignored (subwords will only be generated * given SUBWORD_DELIM tokens). (Defaults to true) */ - final boolean splitOnCaseChange; +// final boolean splitOnCaseChange; /** * If false, causes numeric changes to be ignored (subwords will only be generated * given SUBWORD_DELIM tokens). (Defaults to true) */ - final boolean splitOnNumerics; +// final boolean splitOnNumerics; /** * If true, causes trailing "'s" to be removed for each subword. (Defaults to true) * * "O'Neil's" => "O", "Neil" */ - final boolean stemEnglishPossessive; +// final boolean stemEnglishPossessive; - + + private final int flags; private final byte[] charTypeTable; /** if true, need to skip over a possessive found in the last call to next() */ @@ -94,19 +95,42 @@ } /** + * Creates a new WordDelimiterIterator. For configuration information see + * {@link WordDelimiterFilter#SPLIT_ON_CASE_CHANGE}, {@link WordDelimiterFilter#SPLIT_ON_NUMERICS} and + * {@link WordDelimiterFilter#STEM_ENGLISH_POSSESSIVE} + * + * @param charTypeTable table containing character types + * @param flags Configuration flags + */ + WordDelimiterIterator(byte[] charTypeTable, int flags) { + this.charTypeTable = charTypeTable; + this.flags = flags; + } + + /** * Create a new WordDelimiterIterator operating with the supplied rules. * * @param charTypeTable table containing character types * @param splitOnCaseChange if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) * @param splitOnNumerics if true, causes "j2se" to be three tokens; "j" "2" "se" * @param stemEnglishPossessive if true, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" + * @deprecated Use {@link #WordDelimiterIterator(byte[], int)} */ + @Deprecated WordDelimiterIterator(byte[] charTypeTable, boolean splitOnCaseChange, boolean splitOnNumerics, boolean stemEnglishPossessive) { this.charTypeTable = charTypeTable; - this.splitOnCaseChange = splitOnCaseChange; - this.splitOnNumerics = splitOnNumerics; - this.stemEnglishPossessive = stemEnglishPossessive; + int flags = 0; + if (splitOnCaseChange) { + flags |= SPLIT_ON_CASE_CHANGE; - } + } + if (splitOnNumerics) { + flags |= SPLIT_ON_NUMERICS; + } + if (stemEnglishPossessive) { + flags |= STEM_ENGLISH_POSSESSIVE; + } + this.flags = flags; + } /** * Advance to the next subword in the string. @@ -200,13 +224,13 @@ return false; } - if (!splitOnCaseChange && isAlpha(lastType) && isAlpha(type)) { + if (!has(SPLIT_ON_CASE_CHANGE) && isAlpha(lastType) && isAlpha(type)) { // ALPHA->ALPHA: always ignore if case isn't considered. return false; } else if (isUpper(lastType) && isAlpha(type)) { // UPPER->letter: Don't split return false; - } else if (!splitOnNumerics && ((isAlpha(lastType) && isDigit(type)) || (isDigit(lastType) && isAlpha(type)))) { + } else if (!has(SPLIT_ON_NUMERICS) && ((isAlpha(lastType) && isDigit(type)) || (isDigit(lastType) && isAlpha(type)))) { // ALPHA->NUMERIC, NUMERIC->ALPHA :Don't split return false; } @@ -253,7 +277,7 @@ * @return {@code true} if the text at the position indicates an English posessive, {@code false} otherwise */ private boolean endsWithPossessive(int pos) { - return (stemEnglishPossessive && + return (has(STEM_ENGLISH_POSSESSIVE) && pos > 2 && text[pos - 2] == '\'' && (text[pos - 1] == 's' || text[pos - 1] == 'S') && @@ -323,4 +347,14 @@ default: return SUBWORD_DELIM; } } + + /** + * Determines whether the given flag is set + * + * @param flag Flag to see if set + * @return {@code} true if flag is set + */ + private boolean has(int flag) { + return (flags & flag) != 0; -} \ No newline at end of file + } +} \ No newline at end of file