Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 793530) +++ CHANGES.txt (working copy) @@ -135,6 +135,13 @@ true in all Lucene releases before 2.3, but was broken in 2.3 and 2.4, and is now fixed in 2.9. (Mike McCandless) +11. LUCENE-1678: The addition of Analyzer.reusableTokenStream + accidentally broke back compatibility of external analyzers that + subclassed core analyzers that implemented tokenStream but not + reusableTokenStream. This is now fixed, such that if + reusableTokenStream is invoked on such a subclass, that method + will forcefully fallback to tokenStream. (Mike McCandless) + API Changes 1. LUCENE-1419: Add expert API to set custom indexing chain. This API is Index: src/test/org/apache/lucene/analysis/TestAnalyzers.java =================================================================== --- src/test/org/apache/lucene/analysis/TestAnalyzers.java (revision 793530) +++ src/test/org/apache/lucene/analysis/TestAnalyzers.java (working copy) @@ -19,8 +19,10 @@ import java.io.IOException; import java.io.StringReader; +import java.io.Reader; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.Payload; @@ -130,6 +132,21 @@ x = StandardTokenizer.CJ; String[] y = StandardTokenizer.TOKEN_TYPES; } + + private static class MyStandardAnalyzer extends StandardAnalyzer { + public TokenStream tokenStream(String field, Reader reader) { + return new WhitespaceAnalyzer().tokenStream(field, reader); + } + } + + public void testSubclassOverridingOnlyTokenStream() throws Throwable { + Analyzer a = new MyStandardAnalyzer(); + TokenStream ts = a.reusableTokenStream("field", new StringReader("the")); + // StandardAnalyzer will discard "the" (it's a + // stopword), by my subclass will not: + assertTrue(ts.incrementToken()); + assertFalse(ts.incrementToken()); + } } class PayloadSetter extends TokenFilter { Index: src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 793530) +++ src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy) @@ -44,7 +44,6 @@ */ public class StandardAnalyzer extends Analyzer { private Set stopSet; - private Version matchVersion; /** * Specifies whether deprecated acronyms should be replaced with HOST type. @@ -262,7 +261,7 @@ } private final void init(Version matchVersion) { - this.matchVersion = matchVersion; + setOverridesTokenStreamMethod(StandardAnalyzer.class); if (matchVersion.onOrAfter(Version.LUCENE_29)) { enableStopPositionIncrements = true; } else { @@ -314,6 +313,12 @@ /** @deprecated Use {@link #tokenStream} instead */ public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { + if (overridesTokenStreamMethod) { + // LUCENE-1678: force fallback to tokenStream() if we + // have been subclassed and that subclass overrides + // tokenStream but not reusableTokenStream + return tokenStream(fieldName, reader); + } SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); Index: src/java/org/apache/lucene/analysis/Analyzer.java =================================================================== --- src/java/org/apache/lucene/analysis/Analyzer.java (revision 793530) +++ src/java/org/apache/lucene/analysis/Analyzer.java (working copy) @@ -19,6 +19,7 @@ import java.io.Reader; import java.io.IOException; +import java.lang.reflect.Method; import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.store.AlreadyClosedException; @@ -32,7 +33,8 @@ */ public abstract class Analyzer { /** Creates a TokenStream which tokenizes all the text in the provided - * Reader. Must be able to handle null field name for backward compatibility. + * Reader. Must be able to handle null field name for + * backward compatibility. */ public abstract TokenStream tokenStream(String fieldName, Reader reader); @@ -79,7 +81,30 @@ } } + protected boolean overridesTokenStreamMethod; + /** @deprecated This is only present to preserve + * back-compat of classes that subclass a core analyzer + * and override tokenStream but not reusableTokenStream */ + protected void setOverridesTokenStreamMethod(Class baseClass) { + + final Class[] params = new Class[2]; + params[0] = String.class; + params[1] = Reader.class; + + try { + Method m = this.getClass().getMethod("tokenStream", params); + if (m != null) { + overridesTokenStreamMethod = m.getDeclaringClass() != baseClass; + } else { + overridesTokenStreamMethod = false; + } + } catch (NoSuchMethodException nsme) { + overridesTokenStreamMethod = false; + } + } + + /** * Invoked before indexing a Fieldable instance if * terms have already been added to that field. This allows custom Index: src/java/org/apache/lucene/analysis/KeywordAnalyzer.java =================================================================== --- src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (revision 793530) +++ src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (working copy) @@ -25,18 +25,27 @@ * for data like zip codes, ids, and some product names. */ public class KeywordAnalyzer extends Analyzer { + public KeywordAnalyzer() { + setOverridesTokenStreamMethod(KeywordAnalyzer.class); + } public TokenStream tokenStream(String fieldName, final Reader reader) { return new KeywordTokenizer(reader); } public TokenStream reusableTokenStream(String fieldName, final Reader reader) throws IOException { + if (overridesTokenStreamMethod) { + // LUCENE-1678: force fallback to tokenStream() if we + // have been subclassed and that subclass overrides + // tokenStream but not reusableTokenStream + return tokenStream(fieldName, reader); + } Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); if (tokenizer == null) { tokenizer = new KeywordTokenizer(reader); setPreviousTokenStream(tokenizer); } else - tokenizer.reset(reader); + tokenizer.reset(reader); return tokenizer; } } Index: src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java =================================================================== --- src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (revision 793530) +++ src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (working copy) @@ -55,6 +55,7 @@ */ public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) { this.defaultAnalyzer = defaultAnalyzer; + setOverridesTokenStreamMethod(PerFieldAnalyzerWrapper.class); } /** @@ -77,6 +78,12 @@ } public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { + if (overridesTokenStreamMethod) { + // LUCENE-1678: force fallback to tokenStream() if we + // have been subclassed and that subclass overrides + // tokenStream but not reusableTokenStream + return tokenStream(fieldName, reader); + } Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName); if (analyzer == null) analyzer = defaultAnalyzer;