Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (revision 948225) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.IndexableBinaryStringTools; import java.io.IOException; @@ -70,7 +70,7 @@ public final class ICUCollationKeyFilter extends TokenFilter { private Collator collator = null; private RawCollationKey reusableKey = new RawCollationKey(); - private TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * @@ -80,23 +80,22 @@ public ICUCollationKeyFilter(TokenStream input, Collator collator) { super(input); this.collator = collator; - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] termBuffer = termAtt.termBuffer(); - String termText = new String(termBuffer, 0, termAtt.termLength()); + char[] termBuffer = termAtt.buffer(); + String termText = new String(termBuffer, 0, termAtt.length()); collator.getRawCollationKey(termText, reusableKey); int encodedLength = IndexableBinaryStringTools.getEncodedLength( reusableKey.bytes, 0, reusableKey.size); if (encodedLength > termBuffer.length) { - termAtt.resizeTermBuffer(encodedLength); + termAtt.resizeBuffer(encodedLength); } - termAtt.setTermLength(encodedLength); + termAtt.setLength(encodedLength); IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size, - termAtt.termBuffer(), 0, encodedLength); + termAtt.buffer(), 0, encodedLength); return true; } else { return false; Index: modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java =================================================================== --- modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (revision 948225) +++ modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (working copy) @@ -21,8 +21,8 @@ import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -44,31 +44,22 @@ private int tokenStart = 0, tokenEnd = 0; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public SentenceTokenizer(Reader reader) { super(reader); - init(); } public SentenceTokenizer(AttributeSource source, Reader reader) { super(source, reader); - init(); } public SentenceTokenizer(AttributeFactory factory, Reader reader) { super(factory, reader); - init(); } - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } - @Override public boolean incrementToken() throws IOException { clearAttributes(); @@ -112,7 +103,7 @@ if (buffer.length() == 0) return false; else { - termAtt.setTermBuffer(buffer.toString()); + termAtt.setEmpty().append(buffer); offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd)); typeAtt.setType("sentence"); return true; Index: modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java =================================================================== --- modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java (revision 948225) +++ modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java (working copy) @@ -24,8 +24,8 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.hhmm.SegToken; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; /** @@ -40,9 +40,9 @@ private List tokenBuffer; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); /** * Construct a new WordTokenizer. @@ -52,9 +52,6 @@ public WordTokenFilter(TokenStream in) { super(in); this.wordSegmenter = new WordSegmenter(); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override @@ -63,7 +60,7 @@ // there are no remaining tokens from the current sentence... are there more sentences? if (input.incrementToken()) { // a new sentence is available: process it. - tokenBuffer = wordSegmenter.segmentSentence(termAtt.term(), offsetAtt.startOffset()); + tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset()); tokenIter = tokenBuffer.iterator(); /* * it should not be possible to have a sentence with 0 words, check just in case. @@ -79,7 +76,7 @@ clearAttributes(); // There are remaining tokens from the current sentence, return the next one. SegToken nextWord = tokenIter.next(); - termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length); + termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length); offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset); typeAtt.setType("word"); return true; Index: modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (working copy) @@ -17,8 +17,6 @@ * limitations under the License. */ -import java.io.File; -import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; @@ -27,7 +25,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { public void testHyphenationCompoundWordsDA() throws Exception { @@ -176,15 +174,15 @@ CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); - TermAttribute termAtt = tf.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class); assertTrue(tf.incrementToken()); - assertEquals("Rindfleischüberwachungsgesetz", termAtt.term()); + assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString()); assertTrue(tf.incrementToken()); - assertEquals("Rind", termAtt.term()); + assertEquals("Rind", termAtt.toString()); wsTokenizer.reset(new StringReader("Rindfleischüberwachungsgesetz")); tf.reset(); assertTrue(tf.incrementToken()); - assertEquals("Rindfleischüberwachungsgesetz", termAtt.term()); + assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString()); } private Reader getHyphenationReader() throws Exception { Index: modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (working copy) @@ -26,7 +26,6 @@ import org.apache.lucene.analysis.core.LetterTokenizer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -176,9 +175,6 @@ QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); a.addStopWords(reader, 10); TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring")); - TermAttribute termAtt = ts.getAttribute(TermAttribute.class); - assertTrue(ts.incrementToken()); - assertEquals("this", termAtt.term()); - assertFalse(ts.incrementToken()); + assertTokenStreamContents(ts, new String[] { "this" }); } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java (working copy) @@ -28,6 +28,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; /** @@ -50,9 +51,9 @@ private List filter(TokenFilter filter) throws IOException { List tas = new ArrayList(); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); while (filter.incrementToken()) { - tas.add(termAtt.term()); + tas.add(termAtt.toString()); } return tas; } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (working copy) @@ -21,46 +21,22 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.util.Version; public class TestReverseStringFilter extends BaseTokenStreamTestCase { public void testFilter() throws Exception { TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Do have a nice day")); // 1-4 length string ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream); - TermAttribute text = filter.getAttribute(TermAttribute.class); - assertTrue(filter.incrementToken()); - assertEquals("oD", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("evah", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("a", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("ecin", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("yad", text.term()); - assertFalse(filter.incrementToken()); + assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" }); } public void testFilterWithMark() throws Exception { TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader( "Do have a nice day")); // 1-4 length string ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001'); - TermAttribute text = filter - .getAttribute(TermAttribute.class); - assertTrue(filter.incrementToken()); - assertEquals("\u0001oD", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("\u0001evah", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("\u0001a", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("\u0001ecin", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("\u0001yad", text.term()); - assertFalse(filter.incrementToken()); + assertTokenStreamContents(filter, + new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" }); } public void testReverseString() throws Exception { Index: modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (working copy) @@ -18,8 +18,8 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.Payload; import org.apache.lucene.util.LuceneTestCase; @@ -32,7 +32,7 @@ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); @@ -70,7 +70,7 @@ public void testFloatEncoding() throws Exception { String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder()); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f)); @@ -88,7 +88,7 @@ public void testIntEncoding() throws Exception { String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder()); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1)); @@ -104,10 +104,10 @@ } void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception { - TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class); assertTrue(stream.incrementToken()); - assertEquals(expected, termAtt.term()); + assertEquals(expected, termAtt.toString()); Payload payload = payloadAtt.getPayload(); if (payload != null) { assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length); @@ -121,9 +121,9 @@ } - void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception { + void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception { assertTrue(stream.incrementToken()); - assertEquals(expected, termAtt.term()); + assertEquals(expected, termAtt.toString()); Payload payload = payAtt.getPayload(); if (payload != null) { assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length); Index: modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java (working copy) @@ -20,8 +20,8 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import java.io.IOException; @@ -39,11 +39,11 @@ NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D"); boolean seenDogs = false; - TermAttribute termAtt = nptf.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class); while (nptf.incrementToken()) { - if (termAtt.term().equals("dogs")) { + if (termAtt.toString().equals("dogs")) { seenDogs = true; assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true); assertTrue("payloadAtt.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null); @@ -60,19 +60,17 @@ } private final class WordTokenFilter extends TokenFilter { - private TermAttribute termAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private WordTokenFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - if (termAtt.term().equals("dogs")) + if (termAtt.toString().equals("dogs")) typeAtt.setType("D"); return true; } else { Index: modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import java.io.IOException; @@ -39,12 +39,12 @@ TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)))); int count = 0; - TermAttribute termAtt = nptf.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class); while (nptf.incrementToken()) { - assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0])))); + assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0])))); assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null); String type = new String(payloadAtt.getPayload().getData(), "UTF-8"); assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true); @@ -55,19 +55,17 @@ } private final class WordTokenFilter extends TokenFilter { - private TermAttribute termAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private WordTokenFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - typeAtt.setType(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0]))); + typeAtt.setType(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))); return true; } else { return false; Index: modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (working copy) @@ -22,11 +22,11 @@ import org.apache.lucene.index.Payload; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.Version; @@ -93,7 +93,7 @@ public void testFilterTokens() throws Exception { SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English"); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class); @@ -102,7 +102,7 @@ filter.incrementToken(); - assertEquals("accent", termAtt.term()); + assertEquals("accent", termAtt.toString()); assertEquals(2, offsetAtt.startOffset()); assertEquals(7, offsetAtt.endOffset()); assertEquals("wrd", typeAtt.type()); @@ -112,27 +112,21 @@ } private final class TestTokenStream extends TokenStream { - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; - private PayloadAttribute payloadAtt; - private PositionIncrementAttribute posIncAtt; - private FlagsAttribute flagsAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); TestTokenStream() { super(); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - payloadAtt = addAttribute(PayloadAttribute.class); - posIncAtt = addAttribute(PositionIncrementAttribute.class); - flagsAtt = addAttribute(FlagsAttribute.class); } @Override public boolean incrementToken() { clearAttributes(); - termAtt.setTermBuffer("accents"); + termAtt.setEmpty().append("accents"); offsetAtt.setOffset(2, 7); typeAtt.setType("wrd"); posIncAtt.setPositionIncrement(3); Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (working copy) @@ -17,17 +17,13 @@ * limitations under the License. */ -import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.Reader; -import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -65,8 +61,8 @@ new RussianLetterTokenizer(TEST_VERSION_CURRENT, sampleUnicode); - TermAttribute text = in.getAttribute(TermAttribute.class); - TermAttribute sampleText = sample.getAttribute(TermAttribute.class); + CharTermAttribute text = in.getAttribute(CharTermAttribute.class); + CharTermAttribute sampleText = sample.getAttribute(CharTermAttribute.class); for (;;) { @@ -76,34 +72,21 @@ boolean nextSampleToken = sample.incrementToken(); assertEquals( "Unicode", - text.term(), + text.toString(), nextSampleToken == false ? null - : sampleText.term()); + : sampleText.toString()); } inWords.close(); sampleUnicode.close(); } - public void testDigitsInRussianCharset() + /** Check that RussianAnalyzer doesnt discard any numbers */ + public void testDigitsInRussianCharset() throws IOException { - Reader reader = new StringReader("text 1000"); - RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT); - TokenStream stream = ra.tokenStream("", reader); - - TermAttribute termText = stream.getAttribute(TermAttribute.class); - try { - assertTrue(stream.incrementToken()); - assertEquals("text", termText.term()); - assertTrue(stream.incrementToken()); - assertEquals("RussianAnalyzer's tokenizer skips numbers from input text", "1000", termText.term()); - assertFalse(stream.incrementToken()); - } - catch (IOException e) - { - fail("unexpected IOException"); - } + RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" }); } /** @deprecated remove this test in Lucene 4.0: stopwords changed */ Index: modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase { @@ -41,11 +41,11 @@ boolean seenDogs = false; - TermAttribute termAtt = ttf.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ttf.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class); ttf.reset(); while (ttf.incrementToken()) { - if (termAtt.term().equals("dogs")) { + if (termAtt.toString().equals("dogs")) { seenDogs = true; assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true); } else { @@ -64,20 +64,18 @@ } private class WordTokenFilter extends TokenFilter { - private TermAttribute termAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private WordTokenFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override public final boolean incrementToken() throws IOException { if (!input.incrementToken()) return false; - if (termAtt.term().equals("dogs")) { + if (termAtt.toString().equals("dogs")) { typeAtt.setType("D"); } return true; Index: modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (working copy) @@ -26,8 +26,8 @@ import org.apache.lucene.analysis.core.LetterTokenizer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; @@ -159,11 +159,11 @@ int j = -1; PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { j += posIncrAtt.getPositionIncrement(); - String termText = termAtt.term(); + String termText = termAtt.toString(); q.add(new Term("content", termText), j); } @@ -186,10 +186,10 @@ TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence")); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - String termText = termAtt.term(); + String termText = termAtt.toString(); q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD); } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (working copy) @@ -31,7 +31,12 @@ import org.apache.lucene.analysis.payloads.PayloadHelper; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column; -import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { @@ -415,7 +420,7 @@ private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) { Token token = new Token(startOffset, endOffset); - token.setTermBuffer(text); + token.setEmpty().append(text); token.setPositionIncrement(posIncr); return token; } @@ -427,7 +432,7 @@ private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) { Token token = new Token(startOffset, endOffset); - token.setTermBuffer(text); + token.setEmpty().append(text); token.setPositionIncrement(posIncr); ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight); return token; @@ -435,7 +440,7 @@ private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset, ShingleMatrixFilter.TokenPositioner positioner) { Token token = new Token(startOffset, endOffset); - token.setTermBuffer(text); + token.setEmpty().append(text); token.setPositionIncrement(posIncr); ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight); ShingleMatrixFilter.defaultSettingsCodec.setTokenPositioner(token, positioner); @@ -445,20 +450,20 @@ // assert-methods start here private void assertNext(TokenStream ts, String text) throws IOException { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); assertTrue(ts.incrementToken()); - assertEquals(text, termAtt.term()); + assertEquals(text, termAtt.toString()); } private void assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); assertTrue(ts.incrementToken()); - assertEquals(text, termAtt.term()); + assertEquals(text, termAtt.toString()); assertEquals(positionIncrement, posIncrAtt.getPositionIncrement()); assertEquals(boost, payloadAtt.getPayload() == null ? 1f : PayloadHelper.decodeFloat(payloadAtt.getPayload().getData()), 0); assertEquals(startOffset, offsetAtt.startOffset()); @@ -466,11 +471,11 @@ } private void assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); assertTrue(ts.incrementToken()); - assertEquals(text, termAtt.term()); + assertEquals(text, termAtt.toString()); assertEquals(startOffset, offsetAtt.startOffset()); assertEquals(endOffset, offsetAtt.endOffset()); } @@ -478,7 +483,7 @@ private static Token createToken(String term, int start, int offset) { Token token = new Token(start, offset); - token.setTermBuffer(term); + token.setEmpty().append(term); return token; } @@ -486,21 +491,15 @@ public final static class TokenListStream extends TokenStream { private Collection tokens; - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; - PayloadAttribute payloadAtt; - OffsetAttribute offsetAtt; - TypeAttribute typeAtt; - FlagsAttribute flagsAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); public TokenListStream(Collection tokens) { this.tokens = tokens; - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - payloadAtt = addAttribute(PayloadAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - flagsAtt = addAttribute(FlagsAttribute.class); } private Iterator iterator; @@ -515,7 +514,7 @@ } Token prototype = iterator.next(); clearAttributes(); - termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength()); + termAtt.copyBuffer(prototype.buffer(), 0, prototype.length()); posIncrAtt.setPositionIncrement(prototype.getPositionIncrement()); flagsAtt.setFlags(prototype.getFlags()); offsetAtt.setOffset(prototype.startOffset(), prototype.endOffset()); Index: modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.shingle.ShingleFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class PositionFilterTest extends BaseTokenStreamTestCase { @@ -30,19 +30,18 @@ protected int index = 0; protected String[] testToken; - protected TermAttribute termAtt; + protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public TestTokenStream(String[] testToken) { super(); this.testToken = testToken; - termAtt = addAttribute(TermAttribute.class); } @Override public final boolean incrementToken() throws IOException { clearAttributes(); if (index < testToken.length) { - termAtt.setTermBuffer(testToken[index++]); + termAtt.setEmpty().append(testToken[index++]); return true; } else { return false; Index: modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java (revision 948225) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java (working copy) @@ -20,30 +20,20 @@ import java.io.StringReader; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import java.util.Set; import java.util.HashSet; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import static org.apache.lucene.analysis.wikipedia.WikipediaTokenizer.*; /** - * - * + * Basic Tests for {@link WikipediaTokenizer} **/ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { protected static final String LINK_PHRASES = "click [[link here again]] click [http://lucene.apache.org here again] [[Category:a b c d]]"; - public WikipediaTokenizerTest(String s) { - super(s); - } - public void testSimple() throws Exception { String text = "This is a [[Category:foo]]"; WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(text)); @@ -51,216 +41,85 @@ new String[] { "This", "is", "a", "foo" }, new int[] { 0, 5, 8, 21 }, new int[] { 4, 7, 9, 24 }, - new String[] { "", "", "", WikipediaTokenizer.CATEGORY }, + new String[] { "", "", "", CATEGORY }, new int[] { 1, 1, 1, 1, }, text.length()); } public void testHandwritten() throws Exception { - //make sure all tokens are in only one type - String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] " + - "Category This is (parens) This is a [[link]] This is an external URL [http://lucene.apache.org] " + - "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' " + - " This is a [[link|display info]] This is a period. Here is $3.25 and here is 3.50. Here's Johnny. " + - "==heading== ===sub head=== followed by some text [[Category:blah| ]] " + - "''[[Category:ital_cat]]'' here is some that is ''italics [[Category:foo]] but is never closed." + - "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this" + - " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" + - " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] Citation martian code"; - Map tcm = new HashMap();//map tokens to types - tcm.put("link", WikipediaTokenizer.INTERNAL_LINK); - tcm.put("display", WikipediaTokenizer.INTERNAL_LINK); - tcm.put("info", WikipediaTokenizer.INTERNAL_LINK); - - tcm.put("http://lucene.apache.org", WikipediaTokenizer.EXTERNAL_LINK_URL); - tcm.put("http://foo.boo.com/test/test/", WikipediaTokenizer.EXTERNAL_LINK_URL); - tcm.put("http://foo.boo.com/test/test/test.html", WikipediaTokenizer.EXTERNAL_LINK_URL); - tcm.put("http://foo.boo.com/test/test/test.html?g=b&c=d", WikipediaTokenizer.EXTERNAL_LINK_URL); - tcm.put("Test", WikipediaTokenizer.EXTERNAL_LINK); + // make sure all tokens are in only one type + String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] " + + "Category This is (parens) This is a [[link]] This is an external URL [http://lucene.apache.org] " + + "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' " + + " This is a [[link|display info]] This is a period. Here is $3.25 and here is 3.50. Here's Johnny. " + + "==heading== ===sub head=== followed by some text [[Category:blah| ]] " + + "''[[Category:ital_cat]]'' here is some that is ''italics [[Category:foo]] but is never closed." + + "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this" + + " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" + + " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] Citation martian code"; - //alphanums - tcm.put("This", ""); - tcm.put("is", ""); - tcm.put("a", ""); - tcm.put("Category", ""); - tcm.put("linked", ""); - tcm.put("parens", ""); - tcm.put("external", ""); - tcm.put("URL", ""); - tcm.put("and", ""); - tcm.put("period", ""); - tcm.put("Here", ""); - tcm.put("Here's", ""); - tcm.put("here", ""); - tcm.put("Johnny", ""); - tcm.put("followed", ""); - tcm.put("by", ""); - tcm.put("text", ""); - tcm.put("that", ""); - tcm.put("but", ""); - tcm.put("never", ""); - tcm.put("closed", ""); - tcm.put("goes", ""); - tcm.put("for", ""); - tcm.put("this", ""); - tcm.put("an", ""); - tcm.put("some", ""); - tcm.put("martian", ""); - tcm.put("code", ""); - - tcm.put("foo", WikipediaTokenizer.CATEGORY); - tcm.put("bar", WikipediaTokenizer.CATEGORY); - tcm.put("none", WikipediaTokenizer.CATEGORY); - tcm.put("withstanding", WikipediaTokenizer.CATEGORY); - tcm.put("blah", WikipediaTokenizer.CATEGORY); - tcm.put("ital", WikipediaTokenizer.CATEGORY); - tcm.put("cat", WikipediaTokenizer.CATEGORY); - - tcm.put("italics", WikipediaTokenizer.ITALICS); - tcm.put("more", WikipediaTokenizer.ITALICS); - tcm.put("bold", WikipediaTokenizer.BOLD); - tcm.put("same", WikipediaTokenizer.BOLD); - tcm.put("five", WikipediaTokenizer.BOLD_ITALICS); - tcm.put("and2", WikipediaTokenizer.BOLD_ITALICS); - tcm.put("quotes", WikipediaTokenizer.BOLD_ITALICS); - - tcm.put("heading", WikipediaTokenizer.HEADING); - tcm.put("sub", WikipediaTokenizer.SUB_HEADING); - tcm.put("head", WikipediaTokenizer.SUB_HEADING); - - tcm.put("Citation", WikipediaTokenizer.CITATION); - - tcm.put("3.25", ""); - tcm.put("3.50", ""); WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test)); - int count = 0; - int numItalics = 0; - int numBoldItalics = 0; - int numCategory = 0; - int numCitation = 0; - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class); - - while (tf.incrementToken()) { - String tokText = termAtt.term(); - //System.out.println("Text: " + tokText + " Type: " + token.type()); - String expectedType = tcm.get(tokText); - assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null); - assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true); - count++; - if (typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true){ - numItalics++; - } else if (typeAtt.type().equals(WikipediaTokenizer.BOLD_ITALICS) == true){ - numBoldItalics++; - } else if (typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true){ - numCategory++; - } - else if (typeAtt.type().equals(WikipediaTokenizer.CITATION) == true){ - numCitation++; - } - } - assertTrue("We have not seen enough tokens: " + count + " is not >= " + tcm.size(), count >= tcm.size()); - assertTrue(numItalics + " does not equal: " + 4 + " for numItalics", numItalics == 4); - assertTrue(numBoldItalics + " does not equal: " + 3 + " for numBoldItalics", numBoldItalics == 3); - assertTrue(numCategory + " does not equal: " + 10 + " for numCategory", numCategory == 10); - assertTrue(numCitation + " does not equal: " + 1 + " for numCitation", numCitation == 1); + assertTokenStreamContents(tf, + new String[] {"link", "This", "is", "a", + "foo", "Category", "This", "is", "a", "linked", "bar", "none", + "withstanding", "Category", "This", "is", "parens", "This", "is", "a", + "link", "This", "is", "an", "external", "URL", + "http://lucene.apache.org", "Here", "is", "italics", "and", "more", + "italics", "bold", "and", "five", "quotes", "This", "is", "a", "link", + "display", "info", "This", "is", "a", "period", "Here", "is", "3.25", + "and", "here", "is", "3.50", "Here's", "Johnny", "heading", "sub", + "head", "followed", "by", "some", "text", "blah", "ital", "cat", + "here", "is", "some", "that", "is", "italics", "foo", "but", "is", + "never", "closed", "same", "foo", "goes", "for", "this", "and2", "foo", + "and", "this", "http://foo.boo.com/test/test/", "Test", "Test", + "http://foo.boo.com/test/test/test.html", "Test", "Test", + "http://foo.boo.com/test/test/test.html?g=b&c=d", "Test", "Test", + "Citation", "martian", "code"}, + new String[] {INTERNAL_LINK, + "", "", "", CATEGORY, "", + "", "", "", "", CATEGORY, + CATEGORY, CATEGORY, "", "", "", + "", "", "", "", INTERNAL_LINK, + "", "", "", "", "", + EXTERNAL_LINK_URL, "", "", ITALICS, "", + ITALICS, ITALICS, BOLD, "", BOLD_ITALICS, BOLD_ITALICS, + "", "", "", INTERNAL_LINK, INTERNAL_LINK, + INTERNAL_LINK, "", "", "", "", + "", "", "", "", "", + "", "", "", "", HEADING, + SUB_HEADING, SUB_HEADING, "", "", "", + "", CATEGORY, CATEGORY, CATEGORY, "", "", + "", "", "", ITALICS, CATEGORY, + "", "", "", "", BOLD, CATEGORY, + "", "", "", BOLD_ITALICS, CATEGORY, + "", "", EXTERNAL_LINK_URL, EXTERNAL_LINK, + EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, + EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, CITATION, + "", ""}); } public void testLinkPhrases() throws Exception { - WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(LINK_PHRASES)); checkLinkPhrases(tf); - } private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException { - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "click", termAtt.term().equals("click") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", termAtt.term().equals("link") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - //The link, and here should be at the same position for phrases to work - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "again", - termAtt.term().equals("again") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "click", - termAtt.term().equals("click") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org", - termAtt.term().equals("http://lucene.apache.org") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "again", - termAtt.term().equals("again") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "a", - termAtt.term().equals("a") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "b", - termAtt.term().equals("b") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "c", - termAtt.term().equals("c") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "d", - termAtt.term().equals("d") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertFalse(tf.incrementToken()); + assertTokenStreamContents(tf, + new String[] { "click", "link", "here", "again", "click", + "http://lucene.apache.org", "here", "again", "a", "b", "c", "d" }, + new int[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1 }); } public void testLinks() throws Exception { String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]"; WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test)); - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news", - termAtt.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - tf.incrementToken();//skip here - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c", - termAtt.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - tf.incrementToken();//skip here - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c", - termAtt.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - - assertTrue(tf.incrementToken()); - assertFalse(tf.incrementToken()); + assertTokenStreamContents(tf, + new String[] { "http://lucene.apache.org/java/docs/index.html#news", "here", + "http://lucene.apache.org/java/docs/index.html?b=c", "here", + "https://lucene.apache.org/java/docs/index.html?b=c", "here" }, + new String[] { EXTERNAL_LINK_URL, EXTERNAL_LINK, + EXTERNAL_LINK_URL, EXTERNAL_LINK, + EXTERNAL_LINK_URL, EXTERNAL_LINK, }); } public void testLucene1133() throws Exception { @@ -272,73 +131,13 @@ checkLinkPhrases(tf); String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]"; tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.UNTOKENIZED_ONLY, untoks); - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class); - OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "a b c d", - termAtt.term().equals("a b c d") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "e f g", - termAtt.term().equals("e f g") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", - termAtt.term().equals("link") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", - termAtt.term().equals("link") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "there", - termAtt.term().equals("there") == true); - - assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "italics here", - termAtt.term().equals("italics here") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "something", - termAtt.term().equals("something") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "more italics", - termAtt.term().equals("more italics") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "h i j", - termAtt.term().equals("h i j") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133); - - assertFalse(tf.incrementToken()); + assertTokenStreamContents(tf, + new String[] { "a b c d", "e f g", "link", "here", "link", + "there", "italics here", "something", "more italics", "h i j" }, + new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 }, + new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 }, + new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + ); } public void testBoth() throws Exception { @@ -348,211 +147,26 @@ String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]"; //should output all the indivual tokens plus the untokenized tokens as well. Untokenized tokens WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks); - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class); - PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class); - OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class); - FlagsAttribute flagsAtt = tf.addAttribute(FlagsAttribute.class); + assertTokenStreamContents(tf, + new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g", + "link", "here", "link", "there", "italics here", "italics", "here", + "something", "more italics", "more", "italics", "h i j", "h", "i", "j" }, + new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132 }, + new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 }, + new int[] { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1 } + ); - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "a b c d", - termAtt.term().equals("a b c d") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "a", - termAtt.term().equals("a") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(flagsAtt.getFlags() + " equals: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG + " and it shouldn't", flagsAtt.getFlags() != WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 12, offsetAtt.endOffset() == 12); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "b", - termAtt.term().equals("b") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 13, offsetAtt.startOffset() == 13); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 14, offsetAtt.endOffset() == 14); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "c", - termAtt.term().equals("c") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 15, offsetAtt.startOffset() == 15); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 16, offsetAtt.endOffset() == 16); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "d", - termAtt.term().equals("d") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 17, offsetAtt.startOffset() == 17); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18); - - - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "e f g", - termAtt.term().equals("e f g") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "e", - termAtt.term().equals("e") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 33, offsetAtt.endOffset() == 33); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "f", - termAtt.term().equals("f") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 34, offsetAtt.startOffset() == 34); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 35, offsetAtt.endOffset() == 35); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "g", - termAtt.term().equals("g") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 36, offsetAtt.startOffset() == 36); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", - termAtt.term().equals("link") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", - termAtt.term().equals("link") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "there", - termAtt.term().equals("there") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "italics here", - termAtt.term().equals("italics here") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "italics", - termAtt.term().equals("italics") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 78, offsetAtt.endOffset() == 78); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 79, offsetAtt.startOffset() == 79); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "something", - termAtt.term().equals("something") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "more italics", - termAtt.term().equals("more italics") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "more", - termAtt.term().equals("more") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 102, offsetAtt.endOffset() == 102); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "italics", - termAtt.term().equals("italics") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - - assertTrue(offsetAtt.startOffset() + " does not equal: " + 103, offsetAtt.startOffset() == 103); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "h i j", - termAtt.term().equals("h i j") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "h", - termAtt.term().equals("h") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 125, offsetAtt.endOffset() == 125); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "i", - termAtt.term().equals("i") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 128, offsetAtt.startOffset() == 128); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 129, offsetAtt.endOffset() == 129); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "j", - termAtt.term().equals("j") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 132, offsetAtt.startOffset() == 132); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133); - + // now check the flags, TODO: add way to check flags from BaseTokenStreamTestCase? + tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks); + int expectedFlags[] = new int[] { UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, + 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0 }; + FlagsAttribute flagsAtt = tf.addAttribute(FlagsAttribute.class); + tf.reset(); + for (int i = 0; i < expectedFlags.length; i++) { + assertTrue(tf.incrementToken()); + assertEquals("flags " + i, expectedFlags[i], flagsAtt.getFlags()); + } assertFalse(tf.incrementToken()); + tf.close(); } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (working copy) @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -69,12 +69,12 @@ protected final int maxSubwordSize; protected final boolean onlyLongestMatch; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private FlagsAttribute flagsAtt; - private PositionIncrementAttribute posIncAtt; - private TypeAttribute typeAtt; - private PayloadAttribute payloadAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); private final Token wrapper = new Token(); /** @@ -160,13 +160,6 @@ this.dictionary = new CharArraySet(matchVersion, dictionary.size(), false); addAllLowerCase(this.dictionary, dictionary); } - - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - flagsAtt = addAttribute(FlagsAttribute.class); - posIncAtt = addAttribute(PositionIncrementAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - payloadAtt = addAttribute(PayloadAttribute.class); } /** @@ -192,7 +185,7 @@ private final void setToken(final Token token) throws IOException { clearAttributes(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.termBuffer(), 0, token.termLength()); flagsAtt.setFlags(token.getFlags()); typeAtt.setType(token.type()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -210,7 +203,7 @@ if (!input.incrementToken()) return false; - wrapper.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + wrapper.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); wrapper.setStartOffset(offsetAtt.startOffset()); wrapper.setEndOffset(offsetAtt.endOffset()); wrapper.setFlags(flagsAtt.getFlags()); Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the @@ -30,22 +30,19 @@ */ public final class PersianNormalizationFilter extends TokenFilter { + private final PersianNormalizer normalizer = new PersianNormalizer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final PersianNormalizer normalizer; - private final TermAttribute termAtt; - public PersianNormalizationFilter(TokenStream input) { super(input); - normalizer = new PersianNormalizer(); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt - .termLength()); - termAtt.setTermLength(newlen); + final int newlen = normalizer.normalize(termAtt.buffer(), + termAtt.length()); + termAtt.setLength(newlen); return true; } return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link BulgarianStemmer} to stem Bulgarian @@ -35,23 +35,20 @@ *

*/ public final class BulgarianStemFilter extends TokenFilter { - private final BulgarianStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final BulgarianStemmer stemmer = new BulgarianStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public BulgarianStemFilter(final TokenStream input) { super(input); - stemmer = new BulgarianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that stems German words. @@ -45,11 +45,11 @@ /** * The actual token in the input stream. */ - private GermanStemmer stemmer = null; + private GermanStemmer stemmer = new GermanStemmer(); private Set exclusionSet = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); /** * Creates a {@link GermanStemFilter} instance @@ -58,9 +58,6 @@ public GermanStemFilter( TokenStream in ) { super(in); - stemmer = new GermanStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -80,13 +77,13 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - String term = termAtt.term(); + String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusionSet == null || !exclusionSet.contains(term))) { String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link HindiNormalizer} to normalize the @@ -39,7 +39,7 @@ public final class HindiNormalizationFilter extends TokenFilter { private final HindiNormalizer normalizer = new HindiNormalizer(); - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); public HindiNormalizationFilter(TokenStream input) { @@ -50,8 +50,8 @@ public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) - termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(), - termAtt.termLength())); + termAtt.setLength(normalizer.normalize(termAtt.buffer(), + termAtt.length())); return true; } return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java (working copy) @@ -22,13 +22,13 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link HindiStemmer} to stem Hindi words. */ public final class HindiStemFilter extends TokenFilter { - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); private final HindiStemmer stemmer = new HindiStemmer(); @@ -40,7 +40,7 @@ public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) - termAtt.setTermLength(stemmer.stem(termAtt.termBuffer(), termAtt.termLength())); + termAtt.setLength(stemmer.stem(termAtt.buffer(), termAtt.length())); return true; } else { return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.util.Version; @@ -34,7 +34,7 @@ public final class StemmerOverrideFilter extends TokenFilter { private final CharArrayMap dictionary; - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); /** @@ -56,9 +56,9 @@ public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms - String stem = dictionary.get(termAtt.termBuffer(), 0, termAtt.termLength()); + String stem = dictionary.get(termAtt.buffer(), 0, termAtt.length()); if (stem != null) { - termAtt.setTermBuffer(stem); + termAtt.setEmpty().append(stem); keywordAtt.setKeyword(true); } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (working copy) @@ -30,8 +30,8 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -332,8 +332,8 @@ private Matcher matcher; private int pos = 0; private static final Locale locale = Locale.getDefault(); - private TermAttribute termAtt = addAttribute(TermAttribute.class); - private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) { this.str = str; @@ -360,7 +360,7 @@ if (start != end) { // non-empty match (header/trailer) String text = str.substring(start, end); if (toLowerCase) text = text.toLowerCase(locale); - termAtt.setTermBuffer(text); + termAtt.setEmpty().append(text); offsetAtt.setOffset(start, end); return true; } @@ -392,8 +392,8 @@ private final boolean toLowerCase; private final Set stopWords; private static final Locale locale = Locale.getDefault(); - private TermAttribute termAtt = addAttribute(TermAttribute.class); - private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set stopWords) { this.str = str; @@ -446,7 +446,7 @@ { return false; } - termAtt.setTermBuffer(text); + termAtt.setEmpty().append(text); offsetAtt.setOffset(start, i); return true; } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.index.Payload; @@ -44,14 +44,14 @@ private TokenStream prefix; private TokenStream suffix; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncrAtt; private PayloadAttribute payloadAtt; private OffsetAttribute offsetAtt; private TypeAttribute typeAtt; private FlagsAttribute flagsAtt; - private TermAttribute p_termAtt; + private CharTermAttribute p_termAtt; private PositionIncrementAttribute p_posIncrAtt; private PayloadAttribute p_payloadAtt; private OffsetAttribute p_offsetAtt; @@ -64,14 +64,14 @@ this.prefix = prefix; prefixExhausted = false; - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); flagsAtt = addAttribute(FlagsAttribute.class); - p_termAtt = prefix.addAttribute(TermAttribute.class); + p_termAtt = prefix.addAttribute(CharTermAttribute.class); p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class); p_payloadAtt = prefix.addAttribute(PayloadAttribute.class); p_offsetAtt = prefix.addAttribute(OffsetAttribute.class); @@ -115,7 +115,7 @@ private void setCurrentToken(Token token) { if (token == null) return; clearAttributes(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.buffer(), 0, token.length()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -125,7 +125,7 @@ private Token getNextPrefixInputToken(Token token) throws IOException { if (!prefix.incrementToken()) return null; - token.setTermBuffer(p_termAtt.termBuffer(), 0, p_termAtt.termLength()); + token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length()); token.setPositionIncrement(p_posIncrAtt.getPositionIncrement()); token.setFlags(p_flagsAtt.getFlags()); token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset()); @@ -136,7 +136,7 @@ private Token getNextSuffixInputToken(Token token) throws IOException { if (!suffix.incrementToken()) return null; - token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setPositionIncrement(posIncrAtt.getPositionIncrement()); token.setFlags(flagsAtt.getFlags()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); Index: modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link BrazilianStemmer}. @@ -41,10 +41,10 @@ /** * {@link BrazilianStemmer} in use by this filter. */ - private BrazilianStemmer stemmer = null; + private BrazilianStemmer stemmer = new BrazilianStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); /** * Creates a new BrazilianStemFilter @@ -53,9 +53,6 @@ */ public BrazilianStemFilter(TokenStream in) { super(in); - stemmer = new BrazilianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -74,13 +71,13 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) { final String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (working copy) @@ -20,7 +20,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; @@ -72,8 +72,8 @@ private int curGramSize; private int tokStart; - private final TermAttribute termAtt; - private final OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range @@ -101,8 +101,6 @@ this.minGram = minGram; this.maxGram = maxGram; this.side = side; - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); } /** @@ -124,8 +122,8 @@ if (!input.incrementToken()) { return false; } else { - curTermBuffer = termAtt.termBuffer().clone(); - curTermLength = termAtt.termLength(); + curTermBuffer = termAtt.buffer().clone(); + curTermLength = termAtt.length(); curGramSize = minGram; tokStart = offsetAtt.startOffset(); } @@ -138,7 +136,7 @@ int end = start + curGramSize; clearAttributes(); offsetAtt.setOffset(tokStart + start, tokStart + end); - termAtt.setTermBuffer(curTermBuffer, start, curGramSize); + termAtt.copyBuffer(curTermBuffer, start, curGramSize); curGramSize++; return true; } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Tokenizes the input into n-grams of the given size(s). @@ -39,8 +39,8 @@ private int curPos; private int tokStart; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Creates NGramTokenFilter with given min and max n-grams. @@ -58,9 +58,6 @@ } this.minGram = minGram; this.maxGram = maxGram; - - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); } /** @@ -79,8 +76,8 @@ if (!input.incrementToken()) { return false; } else { - curTermBuffer = termAtt.termBuffer().clone(); - curTermLength = termAtt.termLength(); + curTermBuffer = termAtt.buffer().clone(); + curTermLength = termAtt.length(); curGramSize = minGram; curPos = 0; tokStart = offsetAtt.startOffset(); @@ -89,7 +86,7 @@ while (curGramSize <= maxGram) { while (curPos+curGramSize <= curTermLength) { // while there is input clearAttributes(); - termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize); + termAtt.copyBuffer(curTermBuffer, curPos, curGramSize); offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize); curPos++; return true; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; import java.util.HashSet; @@ -51,17 +51,14 @@ /** * The actual token in the input stream. */ - private FrenchStemmer stemmer = null; + private FrenchStemmer stemmer = new FrenchStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public FrenchStemFilter( TokenStream in ) { - super(in); - stemmer = new FrenchStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); + super(in); } /** @@ -82,14 +79,14 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - String term = termAtt.term(); + String term = termAtt.toString(); // Check the exclusion table if ( !keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains( term )) ) { String s = stemmer.stem( term ); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals( term ) ) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -37,7 +37,7 @@ */ public final class ElisionFilter extends TokenFilter { private CharArraySet articles = CharArraySet.EMPTY_SET; - private final TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( "l", "m", "t", "qu", "n", "s", "j"), true)); @@ -100,7 +100,6 @@ super(input); this.articles = CharArraySet.unmodifiableSet( new CharArraySet(matchVersion, articles, true)); - termAtt = addAttribute(TermAttribute.class); } /** @@ -115,13 +114,13 @@ } /** - * Increments the {@link TokenStream} with a {@link TermAttribute} without elisioned start + * Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */ @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] termBuffer = termAtt.termBuffer(); - int termLength = termAtt.termLength(); + char[] termBuffer = termAtt.buffer(); + int termLength = termAtt.length(); int minPoz = Integer.MAX_VALUE; for (int i = 0; i < apostrophes.length; i++) { @@ -137,8 +136,8 @@ // An apostrophe has been found. If the prefix is an article strip it off. if (minPoz != Integer.MAX_VALUE - && articles.contains(termAtt.termBuffer(), 0, minPoz)) { - termAtt.setTermBuffer(termAtt.termBuffer(), minPoz + 1, termAtt.termLength() - (minPoz + 1)); + && articles.contains(termAtt.buffer(), 0, minPoz)) { + termAtt.copyBuffer(termAtt.buffer(), minPoz + 1, termAtt.length() - (minPoz + 1)); } return true; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that stems Dutch words. @@ -52,17 +52,14 @@ /** * The actual token in the input stream. */ - private DutchStemmer stemmer = null; + private DutchStemmer stemmer = new DutchStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public DutchStemFilter(TokenStream _in) { super(_in); - stemmer = new DutchStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -99,14 +96,14 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) { final String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java (working copy) @@ -19,7 +19,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; import java.io.IOException; @@ -42,7 +42,7 @@ */ public final class ReverseStringFilter extends TokenFilter { - private TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final char marker; private final Version matchVersion; private static final char NOMARKER = '\uFFFF'; @@ -131,20 +131,19 @@ super(in); this.matchVersion = matchVersion; this.marker = marker; - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - int len = termAtt.termLength(); + int len = termAtt.length(); if (marker != NOMARKER) { len++; - termAtt.resizeTermBuffer(len); - termAtt.termBuffer()[len - 1] = marker; + termAtt.resizeBuffer(len); + termAtt.buffer()[len - 1] = marker; } - reverse( matchVersion, termAtt.termBuffer(), 0, len ); - termAtt.setTermLength(len); + reverse( matchVersion, termAtt.buffer(), 0, len ); + termAtt.setLength(len); return true; } else { return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** @@ -39,15 +39,13 @@ public final class DelimitedPayloadTokenFilter extends TokenFilter { public static final char DEFAULT_DELIMITER = '|'; private final char delimiter; - private final TermAttribute termAtt; - private final PayloadAttribute payAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class); private final PayloadEncoder encoder; public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) { super(input); - termAtt = addAttribute(TermAttribute.class); - payAtt = addAttribute(PayloadAttribute.class); this.delimiter = delimiter; this.encoder = encoder; } @@ -55,12 +53,12 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - final int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + final int length = termAtt.length(); for (int i = 0; i < length; i++) { if (buffer[i] == delimiter) { payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1)))); - termAtt.setTermLength(i); // simply set a new length + termAtt.setLength(i); // simply set a new length return true; } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (working copy) @@ -35,16 +35,14 @@ private String typeMatch; private Payload thePayload; - private PayloadAttribute payloadAtt; - private TypeAttribute typeAtt; + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) { super(input); //Need to encode the payload thePayload = new Payload(PayloadHelper.encodeFloat(payload)); this.typeMatch = typeMatch; - payloadAtt = addAttribute(PayloadAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override Index: modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java (working copy) @@ -33,13 +33,11 @@ * **/ public class TypeAsPayloadTokenFilter extends TokenFilter { - private PayloadAttribute payloadAtt; - private TypeAttribute typeAtt; + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public TypeAsPayloadTokenFilter(TokenStream input) { super(input); - payloadAtt = addAttribute(PayloadAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java (working copy) @@ -33,13 +33,11 @@ * **/ public class TokenOffsetPayloadTokenFilter extends TokenFilter { - protected OffsetAttribute offsetAtt; - protected PayloadAttribute payAtt; + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class); public TokenOffsetPayloadTokenFilter(TokenStream input) { super(input); - offsetAtt = addAttribute(OffsetAttribute.class); - payAtt = addAttribute(PayloadAttribute.class); } @Override Index: modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link import org.tartarus.snowball.SnowballProgram; @@ -42,7 +42,7 @@ private final SnowballProgram stemmer; - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public SnowballFilter(TokenStream input, SnowballProgram stemmer) { @@ -75,16 +75,16 @@ public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAttr.isKeyword()) { - char termBuffer[] = termAtt.termBuffer(); - final int length = termAtt.termLength(); + char termBuffer[] = termAtt.buffer(); + final int length = termAtt.length(); stemmer.setCurrent(termBuffer, length); stemmer.stem(); final char finalTerm[] = stemmer.getCurrentBuffer(); final int newLength = stemmer.getCurrentBufferLength(); if (finalTerm != termBuffer) - termAtt.setTermBuffer(finalTerm, 0, newLength); + termAtt.copyBuffer(finalTerm, 0, newLength); else - termAtt.setTermLength(newLength); + termAtt.setLength(newLength); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Normalizes Turkish token text to lower case. @@ -37,7 +37,7 @@ private static final int LATIN_SMALL_LETTER_I = '\u0069'; private static final int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131'; private static final int COMBINING_DOT_ABOVE = '\u0307'; - private final TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * Create a new TurkishLowerCaseFilter, that normalizes Turkish token text @@ -47,7 +47,6 @@ */ public TurkishLowerCaseFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); } @Override @@ -55,8 +54,8 @@ boolean iOrAfter = false; if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + int length = termAtt.length(); for (int i = 0; i < length;) { final int ch = Character.codePointAt(buffer, i); @@ -88,7 +87,7 @@ i += Character.toChars(Character.toLowerCase(ch), buffer, i); } - termAtt.setTermLength(length); + termAtt.setLength(length); return true; } else return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Normalizes token text to lower case. @@ -32,20 +32,19 @@ @Deprecated public final class RussianLowerCaseFilter extends TokenFilter { - private TermAttribute termAtt; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public RussianLowerCaseFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); } @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] chArray = termAtt.termBuffer(); - int chLen = termAtt.termLength(); + char[] chArray = termAtt.buffer(); + int chLen = termAtt.length(); for (int i = 0; i < chLen; i++) { chArray[i] = Character.toLowerCase(chArray[i]); Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.ru.RussianStemmer;//javadoc @link import org.apache.lucene.analysis.snowball.SnowballFilter; // javadoc @link @@ -51,17 +51,14 @@ /** * The actual token in the input stream. */ - private RussianStemmer stemmer = null; + private RussianStemmer stemmer = new RussianStemmer(); - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public RussianStemFilter(TokenStream in) { super(in); - stemmer = new RussianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** * Returns the next token in the stream, or null at EOS @@ -71,10 +68,10 @@ { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); final String s = stemmer.stem(term); if (s != null && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java (working copy) @@ -21,7 +21,7 @@ import java.text.ParseException; import java.util.Date; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeSource; /** @@ -34,7 +34,7 @@ public static final String DATE_TYPE = "date"; protected DateFormat dateFormat; - protected TermAttribute termAtt; + protected CharTermAttribute termAtt; /** * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object. @@ -50,10 +50,10 @@ @Override public boolean accept(AttributeSource source) { if (termAtt == null) { - termAtt = source.addAttribute(TermAttribute.class); + termAtt = source.addAttribute(CharTermAttribute.class); } try { - Date date = dateFormat.parse(termAtt.term());//We don't care about the date, just that we can parse it as a date + Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date if (date != null) { return true; } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (working copy) @@ -137,10 +137,10 @@ */ private boolean isOutputHere = false; - private final CharTermAttribute termAtt; - private final OffsetAttribute offsetAtt; - private final PositionIncrementAttribute posIncrAtt; - private final TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); /** @@ -155,10 +155,6 @@ super(input); setMaxShingleSize(maxShingleSize); setMinShingleSize(minShingleSize); - this.termAtt = addAttribute(CharTermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); - this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); - this.typeAtt = addAttribute(TypeAttribute.class); } /** Index: modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -61,21 +61,20 @@ private CharArraySet stopTable; - private TermAttribute termAtt; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public ChineseFilter(TokenStream in) { super(in); stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { while (input.incrementToken()) { - char text[] = termAtt.termBuffer(); - int termLength = termAtt.termLength(); + char text[] = termAtt.buffer(); + int termLength = termAtt.length(); // why not key off token type here assuming ChineseTokenizer comes first? if (!stopTable.contains(text, 0, termLength)) { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (working copy) @@ -23,8 +23,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.AttributeSource; @@ -62,24 +62,16 @@ public ChineseTokenizer(Reader in) { super(in); - init(); } public ChineseTokenizer(AttributeSource source, Reader in) { super(source, in); - init(); } public ChineseTokenizer(AttributeFactory factory, Reader in) { super(factory, in); - init(); } - - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - } - + private int offset = 0, bufferIndex=0, dataLen=0; private final static int MAX_WORD_LEN = 255; private final static int IO_BUFFER_SIZE = 1024; @@ -90,8 +82,8 @@ private int length; private int start; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final void push(char c) { @@ -105,7 +97,7 @@ if (length>0) { //System.out.println(new String(buffer, 0, //length)); - termAtt.setTermBuffer(buffer, 0, length); + termAtt.copyBuffer(buffer, 0, length); offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); return true; } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words.. @@ -35,24 +35,20 @@ * @see KeywordMarkerFilter */ public final class ArabicStemFilter extends TokenFilter { - - private final ArabicStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final ArabicStemmer stemmer = new ArabicStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public ArabicStemFilter(TokenStream input) { super(input); - stemmer = new ArabicStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography. @@ -29,21 +29,18 @@ */ public final class ArabicNormalizationFilter extends TokenFilter { - - private final ArabicNormalizer normalizer; - private final TermAttribute termAtt; + private final ArabicNormalizer normalizer = new ArabicNormalizer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public ArabicNormalizationFilter(TokenStream input) { super(input); - normalizer = new ArabicNormalizer(); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); return true; } return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java (working copy) @@ -35,7 +35,7 @@ /** The first token must have non-zero positionIncrement **/ private boolean firstTokenPositioned = false; - private PositionIncrementAttribute posIncrAtt; + private PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); /** * Constructs a PositionFilter that assigns a position increment of zero to @@ -45,7 +45,6 @@ */ public PositionFilter(final TokenStream input) { super(input); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); } /** Index: modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java (working copy) @@ -21,14 +21,14 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link IndicNormalizer} to normalize text * in Indian Languages. */ public final class IndicNormalizationFilter extends TokenFilter { - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final IndicNormalizer normalizer = new IndicNormalizer(); public IndicNormalizationFilter(TokenStream input) { @@ -38,7 +38,7 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(), termAtt.termLength())); + termAtt.setLength(normalizer.normalize(termAtt.buffer(), termAtt.length())); return true; } else { return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (working copy) @@ -18,10 +18,10 @@ package org.apache.lucene.analysis.wikipedia; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -116,11 +116,11 @@ private Set untokenizedTypes = Collections.emptySet(); private Iterator tokens = null; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; - private PositionIncrementAttribute posIncrAtt; - private TermAttribute termAtt; - private FlagsAttribute flagsAtt; + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); /** * Creates a new instance of the {@link WikipediaTokenizer}. Attaches the @@ -176,12 +176,7 @@ private void init(int tokenOutput, Set untokenizedTypes) { this.tokenOutput = tokenOutput; - this.untokenizedTypes = untokenizedTypes; - this.offsetAtt = addAttribute(OffsetAttribute.class); - this.typeAtt = addAttribute(TypeAttribute.class); - this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); - this.termAtt = addAttribute(TermAttribute.class); - this.flagsAtt = addAttribute(FlagsAttribute.class); + this.untokenizedTypes = untokenizedTypes; } /* @@ -245,8 +240,9 @@ lastPos = currPos + numAdded; } //trim the buffer + // TODO: this is inefficient String s = buffer.toString().trim(); - termAtt.setTermBuffer(s.toCharArray(), 0, s.length()); + termAtt.setEmpty().append(s); offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length())); flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG); //The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos @@ -283,8 +279,9 @@ lastPos = currPos + numAdded; } //trim the buffer + // TODO: this is inefficient String s = buffer.toString().trim(); - termAtt.setTermBuffer(s.toCharArray(), 0, s.length()); + termAtt.setEmpty().append(s); offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length())); flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG); //The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos @@ -298,7 +295,7 @@ private void setupToken() { scanner.getText(termAtt); final int start = scanner.yychar(); - offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.termLength())); + offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length())); } /* Index: modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (working copy) @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 17.05.10 14:51 */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/31/10 3:11 PM */ package org.apache.lucene.analysis.wikipedia; @@ -19,14 +19,14 @@ * limitations under the License. */ -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 17.05.10 14:51 from the specification file - * C:/Users/Uwe Schindler/Projects/lucene/newtrunk/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex + * on 5/31/10 3:11 PM from the specification file + * C:/Users/rmuir/workspace/solrcene/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex */ class WikipediaTokenizerImpl { @@ -37,16 +37,16 @@ private static final int ZZ_BUFFERSIZE = 16384; /** lexical states */ - public static final int CATEGORY_STATE = 2; + public static final int THREE_SINGLE_QUOTES_STATE = 10; + public static final int EXTERNAL_LINK_STATE = 6; public static final int DOUBLE_EQUALS_STATE = 14; - public static final int EXTERNAL_LINK_STATE = 6; public static final int INTERNAL_LINK_STATE = 4; public static final int DOUBLE_BRACE_STATE = 16; + public static final int CATEGORY_STATE = 2; + public static final int YYINITIAL = 0; + public static final int STRING = 18; public static final int FIVE_SINGLE_QUOTES_STATE = 12; - public static final int STRING = 18; public static final int TWO_SINGLE_QUOTES_STATE = 8; - public static final int YYINITIAL = 0; - public static final int THREE_SINGLE_QUOTES_STATE = 10; /** * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l @@ -487,8 +487,8 @@ /** * Fills Lucene token with the current token text. */ -final void getText(TermAttribute t) { - t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); +final void getText(CharTermAttribute t) { + t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); } final int setText(StringBuilder buffer){ @@ -803,184 +803,184 @@ zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 25: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE); + case 16: + { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType; } case 46: break; - case 30: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/ + case 39: + { positionInc = 1; return ACRONYM; } case 47: break; - case 41: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/ + case 8: + { /* ignore */ } case 48: break; - case 14: - { yybegin(STRING); numWikiTokensSeen++; return currentTokType; + case 20: + { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); } case 49: break; - case 23: - { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE); + case 35: + { positionInc = 1; return COMPANY; } case 50: break; - case 34: - { positionInc = 1; return NUM; + case 4: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE); } case 51: break; - case 18: - { /* ignore STRING */ + case 25: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE); } case 52: break; - case 12: - { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/ + case 43: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE); } case 53: break; - case 37: - { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/ + case 22: + { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;} } case 54: break; - case 31: - { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); + case 34: + { positionInc = 1; return NUM; } case 55: break; - case 10: - { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); + case 32: + { positionInc = 1; return APOSTROPHE; } case 56: break; - case 38: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/ + case 23: + { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE); } case 57: break; - case 19: - { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/ + case 21: + { yybegin(STRING); return currentTokType;/*pipe*/ } case 58: break; - case 11: - { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); + case 2: + { positionInc = 1; return ALPHANUM; } case 59: break; - case 1: - { numWikiTokensSeen = 0; positionInc = 1; + case 29: + { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); } case 60: break; - case 33: - { positionInc = 1; return HOST; + case 17: + { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType; } case 61: break; - case 3: - { positionInc = 1; return CJ; + case 44: + { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); } case 62: break; - case 17: - { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType; + case 26: + { yybegin(YYINITIAL); } case 63: break; - case 32: - { positionInc = 1; return APOSTROPHE; + case 3: + { positionInc = 1; return CJ; } case 64: break; - case 8: - { /* ignore */ + case 38: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/ } case 65: break; - case 4: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE); + case 15: + { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); } case 66: break; - case 2: - { positionInc = 1; return ALPHANUM; + case 30: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/ } case 67: break; - case 26: - { yybegin(YYINITIAL); + case 6: + { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType; } case 68: break; - case 43: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE); + case 5: + { positionInc = 1; } case 69: break; - case 36: - { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); + case 19: + { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/ } case 70: break; - case 13: - { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); + case 42: + { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType; } case 71: break; - case 24: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE); - } - case 72: break; case 27: { numLinkToks = 0; yybegin(YYINITIAL); } + case 72: break; + case 11: + { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); + } case 73: break; - case 15: - { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); + case 13: + { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); } case 74: break; - case 28: - { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); + case 14: + { yybegin(STRING); numWikiTokensSeen++; return currentTokType; } case 75: break; - case 39: - { positionInc = 1; return ACRONYM; + case 45: + { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); } case 76: break; - case 29: - { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); + case 28: + { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); } case 77: break; - case 7: - { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType; + case 37: + { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/ } case 78: break; - case 16: - { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType; + case 9: + { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType; } case 79: break; - case 20: - { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); + case 7: + { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType; } case 80: break; - case 35: - { positionInc = 1; return COMPANY; + case 24: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE); } case 81: break; case 40: { positionInc = 1; return EMAIL; } case 82: break; - case 42: - { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType; + case 1: + { numWikiTokensSeen = 0; positionInc = 1; } case 83: break; - case 6: - { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType; + case 18: + { /* ignore STRING */ } case 84: break; - case 44: - { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); + case 36: + { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); } case 85: break; - case 5: - { positionInc = 1; + case 33: + { positionInc = 1; return HOST; } case 86: break; - case 9: - { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType; + case 31: + { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); } case 87: break; - case 45: - { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); + case 41: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/ } case 88: break; - case 22: - { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;} + case 12: + { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/ } case 89: break; - case 21: - { yybegin(STRING); return currentTokType;/*pipe*/ + case 10: + { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); } case 90: break; default: Index: modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex (working copy) @@ -17,7 +17,7 @@ * limitations under the License. */ -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; %% @@ -81,8 +81,8 @@ /** * Fills Lucene token with the current token text. */ -final void getText(TermAttribute t) { - t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); +final void getText(CharTermAttribute t) { + t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); } final int setText(StringBuilder buffer){ Index: modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -98,9 +98,9 @@ */ private boolean preIsTokened = false; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); //~ Constructors ----------------------------------------------------------- @@ -111,25 +111,16 @@ */ public CJKTokenizer(Reader in) { super(in); - init(); } public CJKTokenizer(AttributeSource source, Reader in) { super(source, in); - init(); } public CJKTokenizer(AttributeFactory factory, Reader in) { super(factory, in); - init(); } - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } - //~ Methods ---------------------------------------------------------------- /** @@ -287,7 +278,7 @@ } if (length > 0) { - termAtt.setTermBuffer(buffer, 0, length); + termAtt.copyBuffer(buffer, 0, length); offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); typeAtt.setType(TOKEN_TYPE_NAMES[tokenType]); return true; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java (working copy) @@ -6,7 +6,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -37,23 +37,20 @@ * @see KeywordMarkerFilter */ public final class CzechStemFilter extends TokenFilter { - private final CzechStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CzechStemmer stemmer = new CzechStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public CzechStemFilter(TokenStream input) { super(input); - stemmer = new CzechStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else { Index: lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java =================================================================== --- lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java (revision 948225) +++ lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -125,10 +125,10 @@ // [1] Parse query into separate words so that when we expand we can avoid dups TokenStream ts = a.tokenStream( field, new StringReader( query)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - String word = termAtt.term(); + String word = termAtt.toString(); if ( already.add( word)) top.add( word); } Index: lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java =================================================================== --- lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java (revision 948225) +++ lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java (working copy) @@ -21,8 +21,8 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -45,9 +45,9 @@ private AttributeSource.State current = null; private int todo = 0; - private TermAttribute termAtt; - private TypeAttribute typeAtt; - private PositionIncrementAttribute posIncrAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); /** * Creates an instance for the given underlying stream and synonym table. @@ -71,10 +71,6 @@ this.synonyms = synonyms; this.maxSynonyms = maxSynonyms; - - this.termAtt = addAttribute(TermAttribute.class); - this.typeAtt = addAttribute(TypeAttribute.class); - this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); } /** Returns the next token in the stream, or null at EOS. */ @@ -89,7 +85,7 @@ if (!input.incrementToken()) return false; // EOS; iterator exhausted - stack = synonyms.getSynonyms(termAtt.term()); // push onto stack + stack = synonyms.getSynonyms(termAtt.toString()); // push onto stack if (stack.length > maxSynonyms) randomize(stack); index = 0; current = captureState(); @@ -110,7 +106,7 @@ */ protected boolean createToken(String synonym, AttributeSource.State current) { restoreState(current); - termAtt.setTermBuffer(synonym); + termAtt.setEmpty().append(synonym); typeAtt.setType(SYNONYM_TOKEN_TYPE); posIncrAtt.setPositionIncrement(0); return true; Index: lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java =================================================================== --- lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java (revision 948225) +++ lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java (working copy) @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -117,10 +117,10 @@ // [1] Parse query into separate words so that when we expand we can avoid dups TokenStream ts = a.tokenStream( field, new StringReader( query)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - String word = termAtt.term(); + String word = termAtt.toString(); if ( already.add( word)) top.add( word); } Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (revision 948225) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (working copy) @@ -37,8 +37,8 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; @@ -96,8 +96,8 @@ public static Analyzer qpAnalyzer = new QPTestAnalyzer(); public static final class QPTestFilter extends TokenFilter { - TermAttribute termAtt; - OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Filter which discards the token 'stop' and which expands the token @@ -105,8 +105,6 @@ */ public QPTestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); } boolean inPhrase = false; @@ -117,19 +115,19 @@ if (inPhrase) { inPhrase = false; clearAttributes(); - termAtt.setTermBuffer("phrase2"); + termAtt.setEmpty().append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else while (input.incrementToken()) { - if (termAtt.term().equals("phrase")) { + if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); savedEnd = offsetAtt.endOffset(); - termAtt.setTermBuffer("phrase1"); + termAtt.setEmpty().append("phrase1"); offsetAtt.setOffset(savedStart, savedEnd); return true; - } else if (!termAtt.term().equals("stop")) + } else if (!termAtt.toString().equals("stop")) return true; } return false; @@ -1158,7 +1156,7 @@ private class CannedTokenStream extends TokenStream { private int upto = 0; final PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class); - final TermAttribute term = addAttribute(TermAttribute.class); + final CharTermAttribute term = addAttribute(CharTermAttribute.class); @Override public boolean incrementToken() { clearAttributes(); @@ -1167,16 +1165,16 @@ } if (upto == 0) { posIncr.setPositionIncrement(1); - term.setTermBuffer("a"); + term.setEmpty().append("a"); } else if (upto == 1) { posIncr.setPositionIncrement(1); - term.setTermBuffer("b"); + term.setEmpty().append("b"); } else if (upto == 2) { posIncr.setPositionIncrement(0); - term.setTermBuffer("c"); + term.setEmpty().append("c"); } else { posIncr.setPositionIncrement(0); - term.setTermBuffer("d"); + term.setEmpty().append("d"); } upto++; return true; Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java (revision 948225) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java (working copy) @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.queryParser.core.QueryNodeException; import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator; @@ -163,24 +163,19 @@ private int prevStartOffset; private int prevEndOffset; - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; - OffsetAttribute offsetAtt; - TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public TestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } @Override public final boolean incrementToken() throws java.io.IOException { if (multiToken > 0) { - termAtt.setTermBuffer("multi" + (multiToken + 1)); + termAtt.setEmpty().append("multi" + (multiToken + 1)); offsetAtt.setOffset(prevStartOffset, prevEndOffset); typeAtt.setType(prevType); posIncrAtt.setPositionIncrement(0); @@ -194,7 +189,7 @@ prevType = typeAtt.type(); prevStartOffset = offsetAtt.startOffset(); prevEndOffset = offsetAtt.endOffset(); - String text = termAtt.term(); + String text = termAtt.toString(); if (text.equals("triplemulti")) { multiToken = 2; return true; @@ -228,21 +223,19 @@ private class TestPosIncrementFilter extends TokenFilter { - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); public TestPosIncrementFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); } @Override public final boolean incrementToken() throws java.io.IOException { while (input.incrementToken()) { - if (termAtt.term().equals("the")) { + if (termAtt.toString().equals("the")) { // stopword, do nothing - } else if (termAtt.term().equals("quick")) { + } else if (termAtt.toString().equals("quick")) { posIncrAtt.setPositionIncrement(2); return true; } else { Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java (revision 948225) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java (working copy) @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.util.LuceneTestCase; @@ -157,24 +157,19 @@ private int prevStartOffset; private int prevEndOffset; - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; - OffsetAttribute offsetAtt; - TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public TestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } @Override public final boolean incrementToken() throws java.io.IOException { if (multiToken > 0) { - termAtt.setTermBuffer("multi" + (multiToken + 1)); + termAtt.setEmpty().append("multi" + (multiToken + 1)); offsetAtt.setOffset(prevStartOffset, prevEndOffset); typeAtt.setType(prevType); posIncrAtt.setPositionIncrement(0); @@ -188,7 +183,7 @@ prevType = typeAtt.type(); prevStartOffset = offsetAtt.startOffset(); prevEndOffset = offsetAtt.endOffset(); - String text = termAtt.term(); + String text = termAtt.toString(); if (text.equals("triplemulti")) { multiToken = 2; return true; @@ -222,21 +217,19 @@ private class TestPosIncrementFilter extends TokenFilter { - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); public TestPosIncrementFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); } @Override public final boolean incrementToken() throws java.io.IOException { while (input.incrementToken()) { - if (termAtt.term().equals("the")) { + if (termAtt.toString().equals("the")) { // stopword, do nothing - } else if (termAtt.term().equals("quick")) { + } else if (termAtt.toString().equals("quick")) { posIncrAtt.setPositionIncrement(2); return true; } else { Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (revision 948225) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (working copy) @@ -36,7 +36,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; @@ -93,8 +93,8 @@ public static Analyzer qpAnalyzer = new QPTestAnalyzer(); public static final class QPTestFilter extends TokenFilter { - TermAttribute termAtt; - OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Filter which discards the token 'stop' and which expands the token @@ -102,8 +102,6 @@ */ public QPTestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); } boolean inPhrase = false; @@ -114,19 +112,19 @@ if (inPhrase) { inPhrase = false; clearAttributes(); - termAtt.setTermBuffer("phrase2"); + termAtt.setEmpty().append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else while (input.incrementToken()) { - if (termAtt.term().equals("phrase")) { + if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); savedEnd = offsetAtt.endOffset(); - termAtt.setTermBuffer("phrase1"); + termAtt.setEmpty().append("phrase1"); offsetAtt.setOffset(savedStart, savedEnd); return true; - } else if (!termAtt.term().equals("stop")) + } else if (!termAtt.toString().equals("stop")) return true; } return false; Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (revision 948225) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (working copy) @@ -23,8 +23,8 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.DateTools; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; @@ -68,7 +68,7 @@ boolean inPhrase = false; int savedStart = 0, savedEnd = 0; - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override @@ -76,19 +76,19 @@ clearAttributes(); if (inPhrase) { inPhrase = false; - termAtt.setTermBuffer("phrase2"); + termAtt.setEmpty().append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else while(input.incrementToken()) - if (termAtt.term().equals("phrase")) { + if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); savedEnd = offsetAtt.endOffset(); - termAtt.setTermBuffer("phrase1"); + termAtt.setEmpty().append("phrase1"); offsetAtt.setOffset(savedStart, savedEnd); return true; - } else if (!termAtt.term().equals("stop")) + } else if (!termAtt.toString().equals("stop")) return true; return false; } Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java (revision 948225) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.Query; import org.apache.lucene.util.Version; @@ -107,7 +107,7 @@ // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); int countTokens = 0; while (true) { @@ -116,7 +116,7 @@ } catch (IOException e) { break; } - String term = termAtt.term(); + String term = termAtt.toString(); if (!"".equals(term)) { try { tlist.set(countTokens++, term); @@ -190,7 +190,7 @@ // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); List tlist = new ArrayList(); - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); while (true) { try { @@ -198,7 +198,7 @@ } catch (IOException e) { break; } - tlist.add(termAtt.term()); + tlist.add(termAtt.toString()); } try { @@ -237,13 +237,13 @@ throws ParseException { // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); String nextToken = null; boolean multipleTokens = false; try { if (source.incrementToken()) { - nextToken = termAtt.term(); + nextToken = termAtt.toString(); } multipleTokens = source.incrementToken(); } catch (IOException e) { @@ -273,13 +273,13 @@ throws ParseException { // get Analyzer from superclass and tokenize the terms TokenStream source = getAnalyzer().tokenStream(field, new StringReader(part1)); - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); boolean multipleTokens = false; // part1 try { if (source.incrementToken()) { - part1 = termAtt.term(); + part1 = termAtt.toString(); } multipleTokens = source.incrementToken(); } catch (IOException e) { @@ -297,11 +297,11 @@ // part2 source = getAnalyzer().tokenStream(field, new StringReader(part2)); - termAtt = source.addAttribute(TermAttribute.class); + termAtt = source.addAttribute(CharTermAttribute.class); try { if (source.incrementToken()) { - part2 = termAtt.term(); + part2 = termAtt.toString(); } multipleTokens = source.incrementToken(); } catch (IOException e) { Index: lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 948225) +++ lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -26,9 +26,10 @@ import java.util.Locale; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker; @@ -918,11 +919,11 @@ TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text)); ts1.reset(); ts2.reset(); - TermAttribute termAtt1 = ts1.addAttribute(TermAttribute.class); - TermAttribute termAtt2 = ts2.addAttribute(TermAttribute.class); + CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class); + CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class); assertTrue(ts1.incrementToken()); assertTrue(ts2.incrementToken()); - assertEquals(termAtt1.term(), termAtt2.term()); + assertEquals(termAtt1.toString(), termAtt2.toString()); assertFalse(ts1.incrementToken()); assertFalse(ts2.incrementToken()); ts1.close(); @@ -994,21 +995,7 @@ private void assertEqualShingle (Analyzer analyzer, String text, String[] expected) throws Exception { - TokenStream stream = analyzer.tokenStream("bogus", new StringReader(text)); - stream.reset(); - TermAttribute termAtt = stream.addAttribute(TermAttribute.class); - int termNum = 0; - while (stream.incrementToken()) { - assertTrue("Extra output term(s), starting with '" - + new String(termAtt.termBuffer(), 0, termAtt.termLength()) + "'", - termNum < expected.length); - assertEquals("Mismatch in output term # " + termNum + " - ", - expected[termNum], - new String(termAtt.termBuffer(), 0, termAtt.termLength())); - ++termNum; - } - assertEquals("Too few output terms", expected.length, termNum); - stream.close(); + BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected); } private String[] getShingleConfig(String params) { Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java =================================================================== --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java (revision 948225) +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java (working copy) @@ -5,7 +5,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; import org.apache.lucene.search.TermsFilter; @@ -57,7 +57,7 @@ String text = DOMUtils.getNonBlankTextOrFail(e); String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); try { @@ -65,11 +65,11 @@ while (ts.incrementToken()) { if (term == null) { - term = new Term(fieldName, termAtt.term()); + term = new Term(fieldName, termAtt.toString()); } else { // create from previous to save fieldName.intern overhead - term = term.createTerm(termAtt.term()); + term = term.createTerm(termAtt.toString()); } tf.addTerm(term); } Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java =================================================================== --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java (revision 948225) +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java (working copy) @@ -10,7 +10,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.search.similar.MoreLikeThisQuery; import org.apache.lucene.search.Query; import org.apache.lucene.xmlparser.DOMUtils; @@ -77,11 +77,11 @@ for (int i = 0; i < fields.length; i++) { TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); try { while(ts.incrementToken()) { - stopWordsSet.add(termAtt.term()); + stopWordsSet.add(termAtt.toString()); } } catch(IOException ioe) Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java =================================================================== --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java (revision 948225) +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java (working copy) @@ -6,7 +6,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; @@ -56,10 +56,10 @@ { ArrayList clausesList=new ArrayList(); TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.term())); + SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.toString())); clausesList.add(stq); } SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java =================================================================== --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java (revision 948225) +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java (working copy) @@ -5,7 +5,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -57,16 +57,16 @@ TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); try { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); Term term = null; while (ts.incrementToken()) { if (term == null) { - term = new Term(fieldName, termAtt.term()); + term = new Term(fieldName, termAtt.toString()); } else { // create from previous to save fieldName.intern overhead - term = term.createTerm(termAtt.term()); + term = term.createTerm(termAtt.toString()); } bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD)); } Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java (revision 948225) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; @@ -296,16 +296,11 @@ private int i = -1; - private TermAttribute termAttribute; + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); - private OffsetAttribute offsetAttribute; - - private PositionIncrementAttribute positionIncrementAttribute; - public TokenStreamSparse() { - termAttribute = addAttribute(TermAttribute.class); - offsetAttribute = addAttribute(OffsetAttribute.class); - positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); reset(); } @@ -316,8 +311,7 @@ return false; } clearAttributes(); - termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i] - .term().length()); + termAttribute.setEmpty().append(this.tokens[i].term()); offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i] .endOffset()); positionIncrementAttribute.setPositionIncrement(this.tokens[i] @@ -342,16 +336,11 @@ private int i = -1; - private TermAttribute termAttribute; + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); - private OffsetAttribute offsetAttribute; - - private PositionIncrementAttribute positionIncrementAttribute; - public TokenStreamConcurrent() { - termAttribute = addAttribute(TermAttribute.class); - offsetAttribute = addAttribute(OffsetAttribute.class); - positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); reset(); } @@ -362,8 +351,7 @@ return false; } clearAttributes(); - termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i] - .term().length()); + termAttribute.setEmpty().append(this.tokens[i].term()); offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i] .endOffset()); positionIncrementAttribute.setPositionIncrement(this.tokens[i] Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 948225) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -41,7 +41,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; @@ -1424,13 +1424,10 @@ return new TokenStream() { Iterator iter; List lst; - private TermAttribute termAtt; - private PositionIncrementAttribute posIncrAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); { - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); lst = new ArrayList(); Token t; t = createToken("hi", 0, 2); @@ -1456,7 +1453,7 @@ if(iter.hasNext()) { Token token = iter.next(); clearAttributes(); - termAtt.setTermBuffer(token.term()); + termAtt.setEmpty().append(token.term()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); return true; @@ -1473,13 +1470,10 @@ return new TokenStream() { Iterator iter; List lst; - private TermAttribute termAtt; - private PositionIncrementAttribute posIncrAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); { - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); lst = new ArrayList(); Token t; t = createToken("hispeed", 0, 8); @@ -1505,7 +1499,7 @@ if(iter.hasNext()) { Token token = iter.next(); clearAttributes(); - termAtt.setTermBuffer(token.term()); + termAtt.setEmpty().append(token.term()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); return true; @@ -1795,7 +1789,7 @@ @Override public TokenStream tokenStream(String arg0, Reader arg1) { Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true); - stream.addAttribute(TermAttribute.class); + stream.addAttribute(CharTermAttribute.class); stream.addAttribute(PositionIncrementAttribute.class); stream.addAttribute(OffsetAttribute.class); return new SynonymTokenizer(stream, synonyms); @@ -1811,21 +1805,21 @@ private Token currentRealToken = null; private Map synonyms; StringTokenizer st = null; - private TermAttribute realTermAtt; + private CharTermAttribute realTermAtt; private PositionIncrementAttribute realPosIncrAtt; private OffsetAttribute realOffsetAtt; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncrAtt; private OffsetAttribute offsetAtt; public SynonymTokenizer(TokenStream realStream, Map synonyms) { this.realStream = realStream; this.synonyms = synonyms; - realTermAtt = realStream.addAttribute(TermAttribute.class); + realTermAtt = realStream.addAttribute(CharTermAttribute.class); realPosIncrAtt = realStream.addAttribute(PositionIncrementAttribute.class); realOffsetAtt = realStream.addAttribute(OffsetAttribute.class); - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); } @@ -1840,25 +1834,25 @@ } //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset()); clearAttributes(); - termAtt.setTermBuffer(realTermAtt.term()); + termAtt.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length()); offsetAtt.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset()); posIncrAtt.setPositionIncrement(realPosIncrAtt.getPositionIncrement()); - String expansions = synonyms.get(realTermAtt.term()); + String expansions = synonyms.get(realTermAtt.toString()); if (expansions == null) { return true; } st = new StringTokenizer(expansions, ","); if (st.hasMoreTokens()) { currentRealToken = new Token(realOffsetAtt.startOffset(), realOffsetAtt.endOffset()); - currentRealToken.setTermBuffer(realTermAtt.term()); + currentRealToken.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length()); } return true; } else { String tok = st.nextToken(); clearAttributes(); - termAtt.setTermBuffer(tok); + termAtt.setEmpty().append(tok); offsetAtt.setOffset(currentRealToken.startOffset(), currentRealToken.endOffset()); posIncrAtt.setPositionIncrement(0); if (!st.hasMoreTokens()) { Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (revision 948225) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (working copy) @@ -26,8 +26,8 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; @@ -221,14 +221,14 @@ ch = 0; } - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override public boolean incrementToken() throws IOException { if( !getNextPartialSnippet() ) return false; clearAttributes(); - termAtt.setTermBuffer(snippet, startTerm, lenTerm); + termAtt.setEmpty().append(snippet, startTerm, startTerm + lenTerm); offsetAtt.setOffset(correctOffset(startOffset), correctOffset(startOffset + lenTerm)); return true; } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java (revision 948225) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java (working copy) @@ -21,7 +21,7 @@ import java.util.HashSet; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; @@ -41,7 +41,7 @@ float maxTermWeight = 0; private HashMap termsToFind; - private TermAttribute termAtt; + private CharTermAttribute termAtt; /** * @@ -95,7 +95,7 @@ * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream) */ public TokenStream init(TokenStream tokenStream) { - termAtt = tokenStream.addAttribute(TermAttribute.class); + termAtt = tokenStream.addAttribute(CharTermAttribute.class); return null; } @@ -118,7 +118,7 @@ * @see org.apache.lucene.search.highlight.Scorer#getTokenScore() */ public float getTokenScore() { - String termText = termAtt.term(); + String termText = termAtt.toString(); WeightedTerm queryTerm = termsToFind.get(termText); if (queryTerm == null) { Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (revision 948225) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (working copy) @@ -25,9 +25,9 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorOffsetInfo; @@ -37,7 +37,7 @@ private Iterator tokensAtCurrentPosition; - private TermAttribute termAttribute; + private CharTermAttribute termAttribute; private PositionIncrementAttribute positionIncrementAttribute; @@ -51,7 +51,7 @@ */ public TokenStreamFromTermPositionVector( final TermPositionVector termPositionVector) { - termAttribute = addAttribute(TermAttribute.class); + termAttribute = addAttribute(CharTermAttribute.class); positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); offsetAttribute = addAttribute(OffsetAttribute.class); final String[] terms = termPositionVector.getTerms(); @@ -100,7 +100,7 @@ if (this.tokensAtCurrentPosition.hasNext()) { final Token next = this.tokensAtCurrentPosition.next(); clearAttributes(); - termAttribute.setTermBuffer(next.term()); + termAttribute.setEmpty().append(next.term()); positionIncrementAttribute.setPositionIncrement(next .getPositionIncrement()); offsetAttribute.setOffset(next.startOffset(), next.endOffset()); Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (revision 948225) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (working copy) @@ -25,8 +25,8 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.search.Query; @@ -46,7 +46,7 @@ private float maxTermWeight; private int position = -1; private String defaultField; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncAtt; private boolean expandMultiTermQuery = true; private Query query; @@ -145,7 +145,7 @@ */ public float getTokenScore() { position += posIncAtt.getPositionIncrement(); - String termText = termAtt.term(); + String termText = termAtt.toString(); WeightedSpanTerm weightedSpanTerm; @@ -175,7 +175,7 @@ */ public TokenStream init(TokenStream tokenStream) throws IOException { position = -1; - termAtt = tokenStream.addAttribute(TermAttribute.class); + termAtt = tokenStream.addAttribute(CharTermAttribute.class); posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); if(!skipInitExtractor) { if(fieldWeightedSpanTerms != null) { Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (revision 948225) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (working copy) @@ -29,6 +29,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; @@ -153,13 +154,13 @@ int currentToken = 0; - TermAttribute termAtt; + CharTermAttribute termAtt; OffsetAttribute offsetAtt; StoredTokenStream(Token tokens[]) { this.tokens = tokens; - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); } @@ -170,7 +171,7 @@ } Token token = tokens[currentToken++]; clearAttributes(); - termAtt.setTermBuffer(token.term()); + termAtt.setEmpty().append(token.term()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); return true; } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java (revision 948225) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java (working copy) @@ -20,9 +20,9 @@ import java.util.List; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.search.spans.Spans; @@ -38,7 +38,7 @@ private QueryScorer queryScorer; private int waitForPos = -1; private int textSize; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncAtt; private OffsetAttribute offsetAtt; @@ -70,7 +70,7 @@ return false; } - WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term()); + WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.toString()); if (wSpanTerm != null) { List positionSpans = wSpanTerm.getPositionSpans(); @@ -101,7 +101,7 @@ position = -1; currentNumFrags = 1; textSize = originalText.length(); - termAtt = tokenStream.addAttribute(TermAttribute.class); + termAtt = tokenStream.addAttribute(CharTermAttribute.class); posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java (revision 948225) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java (working copy) @@ -19,8 +19,8 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; /** * One, or several overlapping tokens, along with the score(s) and the scope of @@ -38,11 +38,11 @@ int matchStartOffset, matchEndOffset; private OffsetAttribute offsetAtt; - private TermAttribute termAtt; + private CharTermAttribute termAtt; public TokenGroup(TokenStream tokenStream) { offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); - termAtt = tokenStream.addAttribute(TermAttribute.class); + termAtt = tokenStream.addAttribute(CharTermAttribute.class); } void addToken(float score) { @@ -68,7 +68,7 @@ } } Token token = new Token(termStartOffset, termEndOffset); - token.setTermBuffer(termAtt.term()); + token.setTermBuffer(termAtt.toString()); tokens[numTokens] = token; scores[numTokens] = score; numTokens++; Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (revision 948225) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (working copy) @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.PriorityQueue; /** @@ -191,7 +191,7 @@ ArrayList docFrags = new ArrayList(); StringBuilder newText=new StringBuilder(); - TermAttribute termAtt = tokenStream.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); tokenStream.addAttribute(PositionIncrementAttribute.class); tokenStream.reset(); @@ -225,7 +225,7 @@ (offsetAtt.startOffset()>text.length()) ) { - throw new InvalidTokenOffsetsException("Token "+ termAtt.term() + throw new InvalidTokenOffsetsException("Token "+ termAtt.toString() +" exceeds length of provided text sized "+text.length()); } if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct())) Index: lucene/contrib/lucli/src/java/lucli/LuceneMethods.java =================================================================== --- lucene/contrib/lucli/src/java/lucli/LuceneMethods.java (revision 948225) +++ lucene/contrib/lucli/src/java/lucli/LuceneMethods.java (working copy) @@ -36,8 +36,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; @@ -303,14 +303,14 @@ int position = 0; // Tokenize field and add to postingTable TokenStream stream = analyzer.tokenStream(fieldName, reader); - TermAttribute termAtt = stream.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); try { while (stream.incrementToken()) { position += (posIncrAtt.getPositionIncrement() - 1); position++; - String name = termAtt.term(); + String name = termAtt.toString(); Integer Count = tokenMap.get(name); if (Count == null) { // not in there yet tokenMap.put(name, Integer.valueOf(1)); //first one Index: lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (revision 948225) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (working copy) @@ -32,7 +32,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -884,10 +884,10 @@ TokenStream ts = analyzer.tokenStream(fieldName, r); int tokenCount=0; // for every token - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - String word = termAtt.term(); + String word = termAtt.toString(); tokenCount++; if(tokenCount>maxNumTokensParsed) { Index: lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java (revision 948225) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -86,12 +86,12 @@ throws IOException { TokenStream ts = a.tokenStream( field, new StringReader( body)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); BooleanQuery tmp = new BooleanQuery(); Set already = new HashSet(); // ignore dups while (ts.incrementToken()) { - String word = termAtt.term(); + String word = termAtt.toString(); // ignore opt stop words if ( stop != null && stop.contains( word)) continue; Index: lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (revision 948225) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; @@ -185,14 +185,14 @@ { if(f.queryString==null) return; TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); int corpusNumDocs=reader.numDocs(); Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects HashSet processedTerms=new HashSet(); while (ts.incrementToken()) { - String term = termAtt.term(); + String term = termAtt.toString(); if(!processedTerms.contains(term)) { processedTerms.add(term);