Index: modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (revision 1096178) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (working copy) @@ -17,7 +17,11 @@ * limitations under the License. */ +import java.io.StringReader; + import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.util.Version; /** @@ -148,4 +152,17 @@ public void testRandomStrings() throws Exception { checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); } + + // LUCENE-3044 + public void testAttributeReuse() throws Exception { + assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); + ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30); + // just consume + TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย")); + assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); + // this consumer adds flagsAtt, which this analyzer does not use. + ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย")); + ts.addAttribute(FlagsAttribute.class); + assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); + } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (revision 1095935) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (working copy) @@ -131,5 +131,8 @@ public void reset() throws IOException { super.reset(); hasMoreTokensInClone = false; + clonedToken = null; + clonedTermAtt = null; + clonedOffsetAtt = null; } } Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 1096194) +++ lucene/contrib/CHANGES.txt (working copy) @@ -57,6 +57,10 @@ * LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered a zero-length token. (Robert Muir) + + * LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only + caused a problem if you consumed a tokenstream, then reused it, added different + attributes to it, and consumed it again. (Robert Muir, Uwe Schindler) New Features