Index: src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (revision 830934) +++ src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (working copy) @@ -57,8 +57,7 @@ * 'a'. */ public final class ASCIIFoldingFilter extends TokenFilter { - public ASCIIFoldingFilter(TokenStream input) - { + public ASCIIFoldingFilter(TokenStream input) { super(input); termAtt = addAttribute(TermAttribute.class); } @@ -77,8 +76,7 @@ // just return token as-is: for(int i = 0 ; i < length ; ++i) { final char c = buffer[i]; - if (c >= '\u0080') - { + if (c >= '\u0080') { foldToASCII(buffer, length); termAtt.setTermBuffer(output, 0, outputPos); break; @@ -96,8 +94,7 @@ * @param input The string to fold * @param length The number of characters in the input string */ - public void foldToASCII(char[] input, int length) - { + public void foldToASCII(char[] input, int length) { // Worst-case length required: final int maxSizeNeeded = 4 * length; if (output.length < maxSizeNeeded) { @@ -108,7 +105,19 @@ for (int pos = 0 ; pos < length ; ++pos) { final char c = input[pos]; - + outputPos = foldToASCII(c, output, outputPos); + } + } + + /** + * Converts characters above ASCII to their ASCII equivalents. For example, + * accents are removed from accented characters. + * @param c The character to fold + * @param output The result of the folding. Should be of size >= {@code outputPos + 4}. + * @param outputPos Where to put the folding result in {@code output} + * @return new position ({@code outputPos}) in {@code ouput} + */ + public static final int foldToASCII(char c, char[] output, int outputPos) { // Quick test: if it's not in range then just keep current character if (c < '\u0080') { output[outputPos++] = c; @@ -2025,7 +2034,7 @@ output[outputPos++] = c; break; } - } } + return outputPos; } }