Index: src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (revision 776655) +++ src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (working copy) @@ -1,5 +1,8 @@ package org.apache.lucene.analysis; +import java.io.IOException; + +import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.ArrayUtil; /** @@ -57,11 +60,35 @@ public ASCIIFoldingFilter(TokenStream input) { super(input); + termAtt = (TermAttribute) addAttribute(TermAttribute.class); } private char[] output = new char[512]; private int outputPos; + private TermAttribute termAtt; + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + final char[] buffer = termAtt.termBuffer(); + final int length = termAtt.termLength(); + + // If no characters actually require rewriting then we + // just return token as-is: + for(int i = 0 ; i < length ; ++i) { + final char c = buffer[i]; + if (c >= '\u0080') + { + foldToASCII(buffer, length); + termAtt.setTermBuffer(output, 0, outputPos); + break; + } + } + return true; + } else { + return false; + } + } + public Token next(Token result) throws java.io.IOException { Index: src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java =================================================================== --- src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (revision 776655) +++ src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.LuceneTestCase; import java.io.StringReader; @@ -34,84 +35,84 @@ +" ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl")); ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); - final Token reusableToken = new Token(); + TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class); - assertEquals("Des", filter.next(reusableToken).term()); - assertEquals("mot", filter.next(reusableToken).term()); - assertEquals("cles", filter.next(reusableToken).term()); - assertEquals("A", filter.next(reusableToken).term()); - assertEquals("LA", filter.next(reusableToken).term()); - assertEquals("CHAINE", filter.next(reusableToken).term()); - assertEquals("A", filter.next(reusableToken).term()); - assertEquals("A", filter.next(reusableToken).term()); - assertEquals("A", filter.next(reusableToken).term()); - assertEquals("A", filter.next(reusableToken).term()); - assertEquals("A", filter.next(reusableToken).term()); - assertEquals("A", filter.next(reusableToken).term()); - assertEquals("AE", filter.next(reusableToken).term()); - assertEquals("C", filter.next(reusableToken).term()); - assertEquals("E", filter.next(reusableToken).term()); - assertEquals("E", filter.next(reusableToken).term()); - assertEquals("E", filter.next(reusableToken).term()); - assertEquals("E", filter.next(reusableToken).term()); - assertEquals("I", filter.next(reusableToken).term()); - assertEquals("I", filter.next(reusableToken).term()); - assertEquals("I", filter.next(reusableToken).term()); - assertEquals("I", filter.next(reusableToken).term()); - assertEquals("IJ", filter.next(reusableToken).term()); - assertEquals("D", filter.next(reusableToken).term()); - assertEquals("N", filter.next(reusableToken).term()); - assertEquals("O", filter.next(reusableToken).term()); - assertEquals("O", filter.next(reusableToken).term()); - assertEquals("O", filter.next(reusableToken).term()); - assertEquals("O", filter.next(reusableToken).term()); - assertEquals("O", filter.next(reusableToken).term()); - assertEquals("O", filter.next(reusableToken).term()); - assertEquals("OE", filter.next(reusableToken).term()); - assertEquals("TH", filter.next(reusableToken).term()); - assertEquals("U", filter.next(reusableToken).term()); - assertEquals("U", filter.next(reusableToken).term()); - assertEquals("U", filter.next(reusableToken).term()); - assertEquals("U", filter.next(reusableToken).term()); - assertEquals("Y", filter.next(reusableToken).term()); - assertEquals("Y", filter.next(reusableToken).term()); - assertEquals("a", filter.next(reusableToken).term()); - assertEquals("a", filter.next(reusableToken).term()); - assertEquals("a", filter.next(reusableToken).term()); - assertEquals("a", filter.next(reusableToken).term()); - assertEquals("a", filter.next(reusableToken).term()); - assertEquals("a", filter.next(reusableToken).term()); - assertEquals("ae", filter.next(reusableToken).term()); - assertEquals("c", filter.next(reusableToken).term()); - assertEquals("e", filter.next(reusableToken).term()); - assertEquals("e", filter.next(reusableToken).term()); - assertEquals("e", filter.next(reusableToken).term()); - assertEquals("e", filter.next(reusableToken).term()); - assertEquals("i", filter.next(reusableToken).term()); - assertEquals("i", filter.next(reusableToken).term()); - assertEquals("i", filter.next(reusableToken).term()); - assertEquals("i", filter.next(reusableToken).term()); - assertEquals("ij", filter.next(reusableToken).term()); - assertEquals("d", filter.next(reusableToken).term()); - assertEquals("n", filter.next(reusableToken).term()); - assertEquals("o", filter.next(reusableToken).term()); - assertEquals("o", filter.next(reusableToken).term()); - assertEquals("o", filter.next(reusableToken).term()); - assertEquals("o", filter.next(reusableToken).term()); - assertEquals("o", filter.next(reusableToken).term()); - assertEquals("o", filter.next(reusableToken).term()); - assertEquals("oe", filter.next(reusableToken).term()); - assertEquals("ss", filter.next(reusableToken).term()); - assertEquals("th", filter.next(reusableToken).term()); - assertEquals("u", filter.next(reusableToken).term()); - assertEquals("u", filter.next(reusableToken).term()); - assertEquals("u", filter.next(reusableToken).term()); - assertEquals("u", filter.next(reusableToken).term()); - assertEquals("y", filter.next(reusableToken).term()); - assertEquals("y", filter.next(reusableToken).term()); - assertEquals("fi", filter.next(reusableToken).term()); - assertEquals("fl", filter.next(reusableToken).term()); - assertNull(filter.next(reusableToken)); + assertTermEquals("Des", filter, termAtt); + assertTermEquals("mot", filter, termAtt); + assertTermEquals("cles", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("LA", filter, termAtt); + assertTermEquals("CHAINE", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("AE", filter, termAtt); + assertTermEquals("C", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("IJ", filter, termAtt); + assertTermEquals("D", filter, termAtt); + assertTermEquals("N", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("OE", filter, termAtt); + assertTermEquals("TH", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("Y", filter, termAtt); + assertTermEquals("Y", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("ae", filter, termAtt); + assertTermEquals("c", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("ij", filter, termAtt); + assertTermEquals("d", filter, termAtt); + assertTermEquals("n", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("oe", filter, termAtt); + assertTermEquals("ss", filter, termAtt); + assertTermEquals("th", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("y", filter, termAtt); + assertTermEquals("y", filter, termAtt); + assertTermEquals("fi", filter, termAtt); + assertTermEquals("fl", filter, termAtt); + assertFalse(filter.incrementToken()); } @@ -1891,11 +1892,16 @@ TokenStream stream = new WhitespaceTokenizer(new StringReader(inputText.toString())); ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); - final Token reusableToken = new Token(); + TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class); Iterator expectedIter = expectedOutputTokens.iterator(); - while (expectedIter.hasNext()) { - assertEquals(expectedIter.next(), filter.next(reusableToken).term()); + while (expectedIter.hasNext()) {; + assertTermEquals((String)expectedIter.next(), filter, termAtt); } - assertNull(filter.next(reusableToken)); + assertFalse(filter.incrementToken()); } + + void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception { + assertTrue(stream.incrementToken()); + assertEquals(expected, termAtt.term()); + } } Index: src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java =================================================================== --- src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java (revision 776655) +++ src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.io.IOException; import java.io.Reader; import java.util.HashMap; import java.util.Map; @@ -317,6 +318,10 @@ } private static class EmptyTokenStream extends TokenStream { + public boolean incrementToken() throws IOException { + return false; + } + public Token next(final Token reusableToken) { return null; }