Index: lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java =================================================================== --- lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (revision 1480831) +++ lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (working copy) @@ -27,7 +27,6 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; /** @@ -48,13 +47,11 @@ private final Matcher matcher = pattern.matcher(""); // encoded representation private String encoded; - // offsets for any buffered outputs - private int startOffset; - private int endOffset; + // preserves all attributes for any buffered outputs + private State state; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** @@ -83,10 +80,10 @@ @Override public boolean incrementToken() throws IOException { if (matcher.find()) { - clearAttributes(); + assert state != null && encoded != null; + restoreState(state); termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1)); posIncAtt.setPositionIncrement(0); - offsetAtt.setOffset(startOffset, endOffset); return true; } @@ -94,8 +91,7 @@ encoded = (languages == null) ? engine.encode(termAtt.toString()) : engine.encode(termAtt.toString(), languages); - startOffset = offsetAtt.startOffset(); - endOffset = offsetAtt.endOffset(); + state = captureState(); matcher.reset(encoded); if (matcher.find()) { termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1)); Index: lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java =================================================================== --- lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (revision 1480831) +++ lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (working copy) @@ -19,7 +19,9 @@ import java.io.IOException; import java.io.Reader; +import java.io.StringReader; import java.util.HashSet; +import java.util.regex.Pattern; import org.apache.commons.codec.language.bm.NameType; import org.apache.commons.codec.language.bm.PhoneticEngine; @@ -29,7 +31,10 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; import org.junit.Ignore; /** Tests {@link BeiderMorseFilter} */ @@ -103,4 +108,20 @@ }; checkOneTermReuse(a, "", ""); } + + public void testCustomAttribute() throws IOException { + TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo")); + stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*")); + stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)); + KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class); + stream.reset(); + int i = 0; + while(stream.incrementToken()) { + assertTrue(keyAtt.isKeyword()); + i++; + } + assertEquals(12, i); + stream.end(); + stream.close(); + } }