Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberCharFilter.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberCharFilter.java (revision 0) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberCharFilter.java (revision 0) @@ -0,0 +1,228 @@ +package org.apache.lucene.analysis.ja; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.junit.Test; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; + +public class TestJapaneseNumberCharFilter extends BaseTokenStreamTestCase { + + // Keyword analyzer with JapaneseNumberCharFilter + private Analyzer keywordAnalyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false); + return new TokenStreamComponents(tokenizer, tokenizer); + } + + @Override + protected Reader initReader(String fieldName, Reader reader) { + return new JapaneseNumberCharFilter(reader); + } + }; + + // Japanese analyzer with JapaneseNumberCharFilter (normal mode) + private Analyzer japaneseAnalyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new JapaneseTokenizer(reader, null, false, JapaneseTokenizer.Mode.NORMAL); + return new TokenStreamComponents(tokenizer, tokenizer); + } + + @Override + protected Reader initReader(String fieldName, Reader reader) { + return new JapaneseNumberCharFilter(reader); + } + }; + + @Test + public void testSimple() throws IOException { + // Test kanji only amounts + assertAnalyzesTo(keywordAnalyzer, "本日十万二千五百円のワインを買った", new String[]{"本日102500円のワインを買った"}); + assertAnalyzesTo(japaneseAnalyzer, "本日十万二千五百円のワインを買った", new String[]{"本日", "102500", "円", "の", "ワイン", "を", "買っ", "た"}); + + // Test kanji and Arabic amounts + assertAnalyzesTo(keywordAnalyzer, "アティリカ株式会社の資本金は600万円です", new String[]{"アティリカ株式会社の資本金は6000000円です"}); + assertAnalyzesTo(japaneseAnalyzer, "アティリカ株式会社の資本金は600万円です", new String[]{"アティリカ", "株式会社", "の", "資本", "金", "は", "6000000", "円", "です"}); + } + + @Test + public void testVariants() throws IOException { + // Test variants of three + assertAnalyzesTo(japaneseAnalyzer, "3", new String[]{"3"}); + assertAnalyzesTo(japaneseAnalyzer, "3", new String[]{"3"}); + assertAnalyzesTo(japaneseAnalyzer, "三", new String[]{"3"}); + + // Test three variations with trailing zero + assertAnalyzesTo(japaneseAnalyzer, "03", new String[]{"3"}); + assertAnalyzesTo(japaneseAnalyzer, "03", new String[]{"3"}); + assertAnalyzesTo(japaneseAnalyzer, "〇三", new String[]{"3"}); + assertAnalyzesTo(japaneseAnalyzer, "003", new String[]{"3"}); + assertAnalyzesTo(japaneseAnalyzer, "003", new String[]{"3"}); + assertAnalyzesTo(japaneseAnalyzer, "〇〇三", new String[]{"3"}); + + // Test thousand variants + assertAnalyzesTo(japaneseAnalyzer, "千", new String[]{"1000"}); + assertAnalyzesTo(japaneseAnalyzer, "1千", new String[]{"1000"}); + assertAnalyzesTo(japaneseAnalyzer, "1千", new String[]{"1000"}); + assertAnalyzesTo(japaneseAnalyzer, "一千", new String[]{"1000"}); + assertAnalyzesTo(japaneseAnalyzer, "一〇〇〇", new String[]{"1000"}); + assertAnalyzesTo(japaneseAnalyzer, "10百", new String[]{"1000"}); // Strange, but supported + } + + @Test + public void testLargeVariants() throws IOException { + // Test large numbers + assertAnalyzesTo(japaneseAnalyzer, "三五七八九", new String[]{"35789"}); + assertAnalyzesTo(japaneseAnalyzer, "六百二万五千一", new String[]{"6025001"}); + assertAnalyzesTo(japaneseAnalyzer, "兆六百万五千一", new String[]{"1000006005001"}); + assertAnalyzesTo(japaneseAnalyzer, "十兆六百万五千一", new String[]{"10000006005001"}); + assertAnalyzesTo(japaneseAnalyzer, "一京一", new String[]{"10000000000000001"}); + assertAnalyzesTo(japaneseAnalyzer, "十京十", new String[]{"100000000000000010"}); + assertAnalyzesTo(japaneseAnalyzer, "垓京兆億万千百十一", new String[]{"100010001000100011111"}); + } + + @Test + public void testMixed() throws IOException { + // Test mixed numbers + assertAnalyzesTo(japaneseAnalyzer, "三千2百2十三", new String[]{"3223"}); + assertAnalyzesTo(japaneseAnalyzer, "32二三", new String[]{"3223"}); + } + + @Test + public void testOdd() throws IOException { + // Test some oddities for inconsistent input + assertAnalyzesTo(japaneseAnalyzer, "十十", new String[]{"20"}); // 100? + assertAnalyzesTo(japaneseAnalyzer, "百百百", new String[]{"300"}); // 10,000? + assertAnalyzesTo(japaneseAnalyzer, "千千千千", new String[]{"4000"}); // 1,000,000,000,000? + } + + @Test + public void testKanjiArabic() throws IOException { + // Test kanji numerals used as Arabic numbers (with head zero) + String input = "〇一二三四五六七八九九八七六五四三二一〇"; + analyzesToWithSource(keywordAnalyzer, input, + new String[]{"1234567899876543210"}, + new String[]{"〇一二三四五六七八九九八七六五四三二一〇"} + ); + + // Bond, James "normalized" Bond... + analyzesToWithSource(keywordAnalyzer, "〇〇七", new String[]{"7"}, new String[]{"〇〇七"}); + } + + @Test + public void testEmpty() throws IOException { + // Test empty + String input = ""; + analyzesToWithSource(japaneseAnalyzer, input, new String[]{}, new String[]{}); + } + + @Test + public void testOffsets() throws IOException { + // Test cumulative offsets by doing reducing and expanding normalizations, and validating against source term + String input = "1万。五千三百五十一。1億。五千三百五十一。五千三百五十一。千。"; + analyzesToWithSource(japaneseAnalyzer, input, + new String[]{"10000", "。", "5351", "。", "100000000", "。", "5351", "。", "5351", "。", "1000", "。"}, + new String[]{"1万", "。", "五千三百五十一", "。", "1億", "。", "五千三百五十一", "。", "五千三百五十一", "。", "千", "。"} + ); + } + + @Test + public void testZeros() throws IOException { + // Test zero variants + String input = "0。00。000。0。00。000。〇。〇〇。〇〇〇。"; + analyzesToWithSource(japaneseAnalyzer, input, + new String[]{"0", "。", "0", "。", "0", "。", "0", "。", "0", "。", "0", "。", "0", "。", "0", "。", "0", "。"}, + new String[]{"0", "。", "00", "。", "000", "。", "0", "。", "00", "。", "000", "。", "〇", "。", "〇〇", "。", "〇〇〇", "。"} + ); + } + + public void testRandomBlurbs() throws IOException { + // Test random blurb input that ends with a number (found by random blasting) + String input = "\u2e9c\u2e83\u2eac\u2eac\u2eaa r ard|r{万"; + analyzesToWithSource(keywordAnalyzer, input, + new String[]{"\u2e9c\u2e83\u2eac\u2eac\u2eaa r ard|r{10000"}, + new String[]{"\u2e9c\u2e83\u2eac\u2eac\u2eaa r ard|r{万"} + ); + + // Test zero-prefixed numbers (found by random blasting) + input = "0663/2001 -2001/0288."; + analyzesToWithSource(japaneseAnalyzer, input, + new String[]{"663", "/", "2001", " ", "-", "2001", "/", "288", "."}, + new String[]{"0663", "/", "2001", " ", "-", "2001", "/", "0288", "."} + ); + } + + public void testRandomStrings() throws Exception { + // Blast some random strings through + checkRandomData(random(), keywordAnalyzer, 1000 * RANDOM_MULTIPLIER); +// checkRandomData(random(), keywordAnalyzer, 100000 * RANDOM_MULTIPLIER); + } + + public void testRandomHugeStrings() throws Exception { + // Blast some random strings through + checkRandomData(random(), keywordAnalyzer, 100 * RANDOM_MULTIPLIER, 8192); +// checkRandomData(random(), keywordAnalyzer, 10000 * RANDOM_MULTIPLIER, 8192); + } + + /** + * Test character normalization and source forms after analysis/segmentation + * + * @param analyzer analyzer to use + * @param input input text + * @param normalizedTerms array of normalized terms after analysis + * @param sourceTerms array of source terms + * @throws IOException on IO error + */ + private void analyzesToWithSource(Analyzer analyzer, String input, String[] normalizedTerms, String[] sourceTerms) + throws IOException { + // NOTE: It useful to have something like this in BaseTokenStreamTestCase to test source forms instead of positions? + TokenStream tokenStream = analyzer.tokenStream("dummy", new StringReader(input)); + OffsetAttribute offset = tokenStream.addAttribute(OffsetAttribute.class); + CharTermAttribute term = tokenStream.addAttribute(CharTermAttribute.class); + + assertEquals("Mismatching token array lengths", normalizedTerms.length, sourceTerms.length); + + tokenStream.reset(); + tokenStream.clearAttributes(); + + for (int i = 0; i < normalizedTerms.length; i++) { + tokenStream.incrementToken(); + int start = offset.startOffset(); + int end = offset.endOffset(); + + assertEquals("Unexpected normalized token", term.toString(), normalizedTerms[i]); + assertEquals("Unexpected source form token", input.substring(start, end), sourceTerms[i]); + } + assertFalse("Unexpected token at the end of stream", tokenStream.incrementToken()); + + tokenStream.end(); + tokenStream.close(); + } +} + Property changes on: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberCharFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberCharFilter.java =================================================================== --- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberCharFilter.java (revision 0) +++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberCharFilter.java (revision 0) @@ -0,0 +1,522 @@ +package org.apache.lucene.analysis.ja; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.charfilter.BaseCharFilter; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.math.BigInteger; + +/** + * Normalizes Japanese numbers + */ +public class JapaneseNumberCharFilter extends BaseCharFilter { + + private static char NO_NUMERAL = Character.MAX_VALUE; + + private static char[] numerals; + + private static char[] exponents; + + private Reader normalizedInput; + + static { + numerals = new char[0x10000]; + for (int i = 0; i < numerals.length; i++) { + numerals[i] = NO_NUMERAL; + } + numerals['〇'] = 0; // 〇 U+3007 0 + numerals['一'] = 1; // 一 U+4E00 1 + numerals['二'] = 2; // 二 U+4E8C 2 + numerals['三'] = 3; // 三 U+4E09 3 + numerals['四'] = 4; // 四 U+56DB 4 + numerals['五'] = 5; // 五 U+4E94 5 + numerals['六'] = 6; // 六 U+516D 6 + numerals['七'] = 7; // 七 U+4E03 7 + numerals['八'] = 8; // 八 U+516B 8 + numerals['九'] = 9; // 九 U+4E5D 9 + + exponents = new char[0x10000]; + for (int i = 0; i < exponents.length; i++) { + exponents[i] = 0; + } + exponents['十'] = 1; // 十 U+5341 10 + exponents['百'] = 2; // 百 U+767E 100 + exponents['千'] = 3; // 千 U+5343 1,000 + + exponents['万'] = 4; // 万 U+4E07 10,000 + exponents['億'] = 8; // 億 U+5104 100,000,000 + exponents['兆'] = 12; // 兆 U+5146 1,000,000,000,000 + exponents['京'] = 16; // 京 U+4EAC 10,000,000,000,000,000 + exponents['垓'] = 20; // 垓 U+5793 100,000,000,000,000,000,000 + } + + /** + * Constructor + * + * @param in input + */ + public JapaneseNumberCharFilter(Reader in) { + super(in); + } + + /** + * {@inheritDoc} + */ + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + if (normalizedInput == null) { + // TODO: rewrite to not buffer input + fill(); + } + return normalizedInput.read(cbuf, off, len); + } + + /** + * {@inheritDoc} + */ + @Override + public int read() throws IOException { + if (normalizedInput == null) { + // TODO: rewrite to not buffer input + fill(); + } + return normalizedInput.read(); + } + + /** + * Fill an input buffer on the first read and then normalize this input + * + * @throws IOException + */ + private void fill() throws IOException { + StringBuilder inputBuffer = new StringBuilder(); + char[] tempBuffer = new char[1024]; + int readCount = in.read(tempBuffer); + + while (readCount > 0) { + inputBuffer.append(tempBuffer, 0, readCount); + readCount = in.read(tempBuffer); + } + // Do normalization and wrap result in a new reader we'll use from now on + normalizedInput = new StringReader(normalize(inputBuffer.toString())); + } + + /** + * Normalize all Japanese numbers found in input + * + * @param input input to normalize + * @return normalized input + */ + public String normalize(String input) { + StringBuilder normalized = new StringBuilder(); + int numberStartIndex = -1; + boolean foundNumber = false; + + int cumulativeOffsetCorrections = 0; + + for (int i = 0; i < input.length(); i++) { + + char c = input.charAt(i); + + if (isNumeral(c)) { + if (!foundNumber) { + numberStartIndex = i; + foundNumber = true; + } + } else { + if (foundNumber) { + // A number ends at index i + foundNumber = false; + + // Extract number and normalize it + String number = input.substring(numberStartIndex, i); + String normalizedNumber = normalizeNumber(number); + + normalized.append(normalizedNumber); + cumulativeOffsetCorrections = correctOffset(i, cumulativeOffsetCorrections, number, normalizedNumber); + } + + // Append non-numeric character + normalized.append(c); + } + } + + // Handle a number character at the and of input + if (foundNumber) { + String number = input.substring(numberStartIndex, input.length()); + String normalizedNumber = normalizeNumber(number); + normalized.append(normalizedNumber); + correctOffset(input.length(), cumulativeOffsetCorrections, number, normalizedNumber); + } + + return normalized.toString(); + } + + private int correctOffset(int index, int cumulative, String number, String normalizedNumber) { + int numberLength = number.length(); + int normalizedNumberLength = normalizedNumber.length(); + + // Only correct offsets for new output if sizes differ + if (numberLength != normalizedNumberLength) { + if (normalizedNumberLength > numberLength) { + // Output is longer - we're adding characters + int replacementDifference = normalizedNumberLength - numberLength; + + int startIndex = index - cumulative; + for (int j = 0; j < replacementDifference; j++) { + int outputIndex = startIndex + j; + addOffCorrectMap(outputIndex, --cumulative); + } + } else { + // Output is shorter -- we're removing characters + int replacementDifference = numberLength - normalizedNumberLength; + cumulative += replacementDifference; + int outputIndex = index - cumulative; + addOffCorrectMap(outputIndex, cumulative); + } + } + + return cumulative; + } + + /** + * Normalizes a Japanese number + * + * @param number number or normalize + * @return normalized number, or number to normalize on error (no op) + */ + public String normalizeNumber(String number) { + try { + BigInteger normalizedNumber = parseNumber(new NumberBuffer(number)); + if (normalizedNumber == null) { + return number; + } + return normalizedNumber.toString(); + + } catch (NumberFormatException nfe) { + // parseBasicNumber() shouldn't throw this, but we catch it as a precaution + return number; + } + } + + /** + * Parses a Japanese number + * + * @param buffer buffer to parse + * @return parsed number, or null on error or end of input + */ + private BigInteger parseNumber(NumberBuffer buffer) { + BigInteger sum = BigInteger.ZERO; + BigInteger result = parseLargePair(buffer); + + if (result == null) { + return null; + } + + while (result != null) { + sum = sum.add(result); + result = parseLargePair(buffer); + } + + return sum; + } + + /** + * Parses a pair of large numbers, i.e. large kanji factor is 10,000(万)or larger + * + * @param buffer buffer to parse + * @return parsed pair, or null on error or end of input + */ + private BigInteger parseLargePair(NumberBuffer buffer) { + BigInteger first = parseMediumNumber(buffer); + BigInteger second = parseLargeKanjiNumeral(buffer); + + if (first == null && second == null) { + return null; + } + + if (second == null) { + // If there's no second factor, we return the first one + // This can happen if we our number is smaller than 10,000 (万) + return first; + } + + if (first == null) { + // If there's no first factor, just return the second one, + // which is the same as multiplying by 1, i.e. with 万 + return second; + } + + return first.multiply(second); + } + + /** + * Parses a "medium sized" number, typically less than 10,000(万), but might be larger + * due to a larger factor from {link parseBasicNumber}. + * + * @param buffer buffer to parse + * @return parsed number, or null on error or end of input + */ + private BigInteger parseMediumNumber(NumberBuffer buffer) { + BigInteger sum = BigInteger.ZERO; + BigInteger result = parseMediumPair(buffer); + + if (result == null) { + return null; + } + + while (result != null) { + sum = sum.add(result); + result = parseMediumPair(buffer); + } + + return sum; + } + + /** + * Parses a pair of "medium sized" numbers, i.e. large kanji factor is at most 1,000(千) + * + * @param buffer buffer to parse + * @return parsed pair, or null on error or end of input + */ + private BigInteger parseMediumPair(NumberBuffer buffer) { + + BigInteger first = parseBasicNumber(buffer); + BigInteger second = parseMediumKanjiNumeral(buffer); + + if (first == null && second == null) { + return null; + } + + if (second == null) { + // If there's no second factor, we return the first one + // This can happen if we just have a plain number such as 五 + return first; + } + + if (first == null) { + // If there's no first factor, just return the second one, + // which is the same as multiplying by 1, i.e. with 千 + return second; + } + + // Return factors multiplied + return first.multiply(second); + } + + /** + * Parse a basic number, which is a sequence of Arabic numbers or a sequence or 0-9 kanji numerals (〇 to 九). + * + * @param buffer buffer to parse + * @return parsed number, or null on error or end of input + */ + private BigInteger parseBasicNumber(NumberBuffer buffer) { + StringBuilder builder = new StringBuilder(); + int i = buffer.position(); + + while (i < buffer.length()) { + char c = buffer.charAt(i); + + if (isArabicNumeral(c)) { + // Arabic numerals; 0 to 9 or 0 to 9 (full-width) + builder.append(arabicNumeralValue(c)); + } else if (isKanjiNumeral(c)) { + // Kanji numerals; 〇, 一, 二, 三, 四, 五, 六, 七, 八, or 九 + builder.append(kanjiNumeralValue(c)); + } else { + // We don't have an Arabic nor a kanji numeral, so we'll stop. + // Our buffer will only be advanced if we have read a numeral. + break; + } + + i++; + buffer.advance(); + } + + if (builder.length() == 0) { + // We didn't build anything, so we don't have a number + return null; + } + + return new BigInteger(builder.toString()); + } + + /** + * Parse large kanji numerals (ten thousands or larger) + * + * @param buffer buffer to parse + * @return parsed number, or null on error or end of input + */ + public BigInteger parseLargeKanjiNumeral(NumberBuffer buffer) { + int i = buffer.position(); + + if (i >= buffer.length()) { + return null; + } + + char c = buffer.charAt(i); + int power = exponents[c]; + + if (power > 3) { + buffer.advance(); + return BigInteger.TEN.pow(power); + } + + return null; + } + + /** + * Parse medium kanji numerals (tens, hundreds or thousands) + * + * @param buffer buffer to parse + * @return parsed number or null on error + */ + public BigInteger parseMediumKanjiNumeral(NumberBuffer buffer) { + int i = buffer.position(); + + if (i >= buffer.length()) { + return null; + } + + char c = buffer.charAt(i); + int power = exponents[c]; + + if (1 <= power && power <= 3) { + buffer.advance(); + return BigInteger.TEN.pow(power); + } + + return null; + } + + /** + * Numeral predicate + * + * @param c character to test + * @return true if and only if c is a numeral + */ + public boolean isNumeral(char c) { + return isArabicNumeral(c) || isKanjiNumeral(c) || exponents[c] > 0; + } + + /** + * Arabic numeral predicate. Both half-width and full-width characters are supported + * + * @param c character to test + * @return true if and only if c is an Arabic numeral + */ + public boolean isArabicNumeral(char c) { + return isHalfWidthArabicNumeral(c) || isFullWidthArabicNumeral(c); + } + + /** + * Arabic half-width numeral predicate + * + * @param c character to test + * @return true if and only if c is a half-width Arabic numeral + */ + private boolean isHalfWidthArabicNumeral(char c) { + // 0 U+0030 - 9 U+0039 + return '0' <= c && c <= '9'; + } + + /** + * Arabic full-width numeral predicate + * + * @param c character to test + * @return true if and only if c is a full-width Arabic numeral + */ + private boolean isFullWidthArabicNumeral(char c) { + // 0 U+FF10 - 9 U+FF19 + return '0' <= c && c <= '9'; + } + + /** + * Returns the numeric value for the specified character Arabic numeral. + * Behavior is undefined if a non-Arabic numeral is provided + * + * @param c arabic numeral character + * @return numeral value + */ + private int arabicNumeralValue(char c) { + int offset; + if (isHalfWidthArabicNumeral(c)) { + offset = '0'; + } else { + offset = '0'; + } + return c - offset; + } + + /** + * Kanji numeral predicate that tests if the provided character is one of 〇, 一, 二, 三, 四, 五, 六, 七, 八, or 九. + * Larger number kanji gives a false value. + * + * @param c character to test + * @return true if and only is character is one of 〇, 一, 二, 三, 四, 五, 六, 七, 八, or 九 (0 to 9) + */ + private boolean isKanjiNumeral(char c) { + return numerals[c] != NO_NUMERAL; + } + + /** + * Returns the value for the provided kanji numeral. Only numeric values for the characters where + * {link isKanjiNumeral} return true are supported - behavior is undefined for other characters. + * + * @param c kanji numeral character + * @return numeral value + * @see #isKanjiNumeral(char) + */ + private int kanjiNumeralValue(char c) { + return numerals[c]; + } + + /** + * Buffer that holds a Japanese number and a position pointer/index + */ + public static class NumberBuffer { + + private int position; + + private String string; + + public NumberBuffer(String string) { + this.string = string; + this.position = 0; + } + + public char charAt(int index) { + return string.charAt(index); + } + + public int length() { + return string.length(); + } + + public void advance() { + position++; + } + + public int position() { + return position; + } + } +} \ No newline at end of file Property changes on: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberCharFilter.java ___________________________________________________________________ Added: svn:eol-style + native