Index: modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java =================================================================== --- modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (revision 948225) +++ modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (working copy) @@ -21,8 +21,8 @@ import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -44,31 +44,22 @@ private int tokenStart = 0, tokenEnd = 0; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public SentenceTokenizer(Reader reader) { super(reader); - init(); } public SentenceTokenizer(AttributeSource source, Reader reader) { super(source, reader); - init(); } public SentenceTokenizer(AttributeFactory factory, Reader reader) { super(factory, reader); - init(); } - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } - @Override public boolean incrementToken() throws IOException { clearAttributes(); @@ -112,7 +103,7 @@ if (buffer.length() == 0) return false; else { - termAtt.setTermBuffer(buffer.toString()); + termAtt.setEmpty().append(buffer); offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd)); typeAtt.setType("sentence"); return true; Index: modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java =================================================================== --- modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java (revision 948225) +++ modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java (working copy) @@ -24,8 +24,8 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.hhmm.SegToken; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; /** @@ -40,9 +40,9 @@ private List tokenBuffer; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); /** * Construct a new WordTokenizer. @@ -52,9 +52,6 @@ public WordTokenFilter(TokenStream in) { super(in); this.wordSegmenter = new WordSegmenter(); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override @@ -63,7 +60,7 @@ // there are no remaining tokens from the current sentence... are there more sentences? if (input.incrementToken()) { // a new sentence is available: process it. - tokenBuffer = wordSegmenter.segmentSentence(termAtt.term(), offsetAtt.startOffset()); + tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset()); tokenIter = tokenBuffer.iterator(); /* * it should not be possible to have a sentence with 0 words, check just in case. @@ -79,7 +76,7 @@ clearAttributes(); // There are remaining tokens from the current sentence, return the next one. SegToken nextWord = tokenIter.next(); - termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length); + termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length); offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset); typeAtt.setType("word"); return true; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (working copy) @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -69,12 +69,12 @@ protected final int maxSubwordSize; protected final boolean onlyLongestMatch; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private FlagsAttribute flagsAtt; - private PositionIncrementAttribute posIncAtt; - private TypeAttribute typeAtt; - private PayloadAttribute payloadAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); private final Token wrapper = new Token(); /** @@ -160,13 +160,6 @@ this.dictionary = new CharArraySet(matchVersion, dictionary.size(), false); addAllLowerCase(this.dictionary, dictionary); } - - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - flagsAtt = addAttribute(FlagsAttribute.class); - posIncAtt = addAttribute(PositionIncrementAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - payloadAtt = addAttribute(PayloadAttribute.class); } /** @@ -192,7 +185,7 @@ private final void setToken(final Token token) throws IOException { clearAttributes(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.termBuffer(), 0, token.termLength()); flagsAtt.setFlags(token.getFlags()); typeAtt.setType(token.type()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -210,7 +203,7 @@ if (!input.incrementToken()) return false; - wrapper.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + wrapper.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); wrapper.setStartOffset(offsetAtt.startOffset()); wrapper.setEndOffset(offsetAtt.endOffset()); wrapper.setFlags(flagsAtt.getFlags()); Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the @@ -30,22 +30,19 @@ */ public final class PersianNormalizationFilter extends TokenFilter { + private final PersianNormalizer normalizer = new PersianNormalizer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final PersianNormalizer normalizer; - private final TermAttribute termAtt; - public PersianNormalizationFilter(TokenStream input) { super(input); - normalizer = new PersianNormalizer(); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt - .termLength()); - termAtt.setTermLength(newlen); + final int newlen = normalizer.normalize(termAtt.buffer(), + termAtt.length()); + termAtt.setLength(newlen); return true; } return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link BulgarianStemmer} to stem Bulgarian @@ -35,23 +35,20 @@ *

*/ public final class BulgarianStemFilter extends TokenFilter { - private final BulgarianStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final BulgarianStemmer stemmer = new BulgarianStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public BulgarianStemFilter(final TokenStream input) { super(input); - stemmer = new BulgarianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that stems German words. @@ -45,11 +45,11 @@ /** * The actual token in the input stream. */ - private GermanStemmer stemmer = null; + private GermanStemmer stemmer = new GermanStemmer(); private Set exclusionSet = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); /** * Creates a {@link GermanStemFilter} instance @@ -58,9 +58,6 @@ public GermanStemFilter( TokenStream in ) { super(in); - stemmer = new GermanStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -80,13 +77,13 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - String term = termAtt.term(); + String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusionSet == null || !exclusionSet.contains(term))) { String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link HindiNormalizer} to normalize the @@ -39,7 +39,7 @@ public final class HindiNormalizationFilter extends TokenFilter { private final HindiNormalizer normalizer = new HindiNormalizer(); - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); public HindiNormalizationFilter(TokenStream input) { @@ -50,8 +50,8 @@ public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) - termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(), - termAtt.termLength())); + termAtt.setLength(normalizer.normalize(termAtt.buffer(), + termAtt.length())); return true; } return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java (working copy) @@ -22,13 +22,13 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link HindiStemmer} to stem Hindi words. */ public final class HindiStemFilter extends TokenFilter { - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); private final HindiStemmer stemmer = new HindiStemmer(); @@ -40,7 +40,7 @@ public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) - termAtt.setTermLength(stemmer.stem(termAtt.termBuffer(), termAtt.termLength())); + termAtt.setLength(stemmer.stem(termAtt.buffer(), termAtt.length())); return true; } else { return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.util.Version; @@ -34,7 +34,7 @@ public final class StemmerOverrideFilter extends TokenFilter { private final CharArrayMap dictionary; - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); /** @@ -56,9 +56,9 @@ public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms - String stem = dictionary.get(termAtt.termBuffer(), 0, termAtt.termLength()); + String stem = dictionary.get(termAtt.buffer(), 0, termAtt.length()); if (stem != null) { - termAtt.setTermBuffer(stem); + termAtt.setEmpty().append(stem); keywordAtt.setKeyword(true); } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.index.Payload; @@ -44,14 +44,14 @@ private TokenStream prefix; private TokenStream suffix; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncrAtt; private PayloadAttribute payloadAtt; private OffsetAttribute offsetAtt; private TypeAttribute typeAtt; private FlagsAttribute flagsAtt; - private TermAttribute p_termAtt; + private CharTermAttribute p_termAtt; private PositionIncrementAttribute p_posIncrAtt; private PayloadAttribute p_payloadAtt; private OffsetAttribute p_offsetAtt; @@ -64,14 +64,14 @@ this.prefix = prefix; prefixExhausted = false; - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); flagsAtt = addAttribute(FlagsAttribute.class); - p_termAtt = prefix.addAttribute(TermAttribute.class); + p_termAtt = prefix.addAttribute(CharTermAttribute.class); p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class); p_payloadAtt = prefix.addAttribute(PayloadAttribute.class); p_offsetAtt = prefix.addAttribute(OffsetAttribute.class); @@ -115,7 +115,7 @@ private void setCurrentToken(Token token) { if (token == null) return; clearAttributes(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.buffer(), 0, token.length()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -125,7 +125,7 @@ private Token getNextPrefixInputToken(Token token) throws IOException { if (!prefix.incrementToken()) return null; - token.setTermBuffer(p_termAtt.termBuffer(), 0, p_termAtt.termLength()); + token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length()); token.setPositionIncrement(p_posIncrAtt.getPositionIncrement()); token.setFlags(p_flagsAtt.getFlags()); token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset()); @@ -136,7 +136,7 @@ private Token getNextSuffixInputToken(Token token) throws IOException { if (!suffix.incrementToken()) return null; - token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setPositionIncrement(posIncrAtt.getPositionIncrement()); token.setFlags(flagsAtt.getFlags()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); Index: modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link BrazilianStemmer}. @@ -41,10 +41,10 @@ /** * {@link BrazilianStemmer} in use by this filter. */ - private BrazilianStemmer stemmer = null; + private BrazilianStemmer stemmer = new BrazilianStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); /** * Creates a new BrazilianStemFilter @@ -53,9 +53,6 @@ */ public BrazilianStemFilter(TokenStream in) { super(in); - stemmer = new BrazilianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -74,13 +71,13 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) { final String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (working copy) @@ -20,7 +20,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; @@ -72,8 +72,8 @@ private int curGramSize; private int tokStart; - private final TermAttribute termAtt; - private final OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range @@ -101,8 +101,6 @@ this.minGram = minGram; this.maxGram = maxGram; this.side = side; - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); } /** @@ -124,8 +122,8 @@ if (!input.incrementToken()) { return false; } else { - curTermBuffer = termAtt.termBuffer().clone(); - curTermLength = termAtt.termLength(); + curTermBuffer = termAtt.buffer().clone(); + curTermLength = termAtt.length(); curGramSize = minGram; tokStart = offsetAtt.startOffset(); } @@ -138,7 +136,7 @@ int end = start + curGramSize; clearAttributes(); offsetAtt.setOffset(tokStart + start, tokStart + end); - termAtt.setTermBuffer(curTermBuffer, start, curGramSize); + termAtt.copyBuffer(curTermBuffer, start, curGramSize); curGramSize++; return true; } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Tokenizes the input into n-grams of the given size(s). @@ -39,8 +39,8 @@ private int curPos; private int tokStart; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Creates NGramTokenFilter with given min and max n-grams. @@ -58,9 +58,6 @@ } this.minGram = minGram; this.maxGram = maxGram; - - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); } /** @@ -79,8 +76,8 @@ if (!input.incrementToken()) { return false; } else { - curTermBuffer = termAtt.termBuffer().clone(); - curTermLength = termAtt.termLength(); + curTermBuffer = termAtt.buffer().clone(); + curTermLength = termAtt.length(); curGramSize = minGram; curPos = 0; tokStart = offsetAtt.startOffset(); @@ -89,7 +86,7 @@ while (curGramSize <= maxGram) { while (curPos+curGramSize <= curTermLength) { // while there is input clearAttributes(); - termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize); + termAtt.copyBuffer(curTermBuffer, curPos, curGramSize); offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize); curPos++; return true; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; import java.util.HashSet; @@ -51,17 +51,14 @@ /** * The actual token in the input stream. */ - private FrenchStemmer stemmer = null; + private FrenchStemmer stemmer = new FrenchStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public FrenchStemFilter( TokenStream in ) { - super(in); - stemmer = new FrenchStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); + super(in); } /** @@ -82,14 +79,14 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - String term = termAtt.term(); + String term = termAtt.toString(); // Check the exclusion table if ( !keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains( term )) ) { String s = stemmer.stem( term ); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals( term ) ) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -37,7 +37,7 @@ */ public final class ElisionFilter extends TokenFilter { private CharArraySet articles = CharArraySet.EMPTY_SET; - private final TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( "l", "m", "t", "qu", "n", "s", "j"), true)); @@ -100,7 +100,6 @@ super(input); this.articles = CharArraySet.unmodifiableSet( new CharArraySet(matchVersion, articles, true)); - termAtt = addAttribute(TermAttribute.class); } /** @@ -120,8 +119,8 @@ @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] termBuffer = termAtt.termBuffer(); - int termLength = termAtt.termLength(); + char[] termBuffer = termAtt.buffer(); + int termLength = termAtt.length(); int minPoz = Integer.MAX_VALUE; for (int i = 0; i < apostrophes.length; i++) { @@ -137,8 +136,8 @@ // An apostrophe has been found. If the prefix is an article strip it off. if (minPoz != Integer.MAX_VALUE - && articles.contains(termAtt.termBuffer(), 0, minPoz)) { - termAtt.setTermBuffer(termAtt.termBuffer(), minPoz + 1, termAtt.termLength() - (minPoz + 1)); + && articles.contains(termAtt.buffer(), 0, minPoz)) { + termAtt.copyBuffer(termAtt.buffer(), minPoz + 1, termAtt.length() - (minPoz + 1)); } return true; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that stems Dutch words. @@ -52,17 +52,14 @@ /** * The actual token in the input stream. */ - private DutchStemmer stemmer = null; + private DutchStemmer stemmer = new DutchStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public DutchStemFilter(TokenStream _in) { super(_in); - stemmer = new DutchStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -99,14 +96,14 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) { final String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java (working copy) @@ -19,7 +19,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; import java.io.IOException; @@ -42,7 +42,7 @@ */ public final class ReverseStringFilter extends TokenFilter { - private TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final char marker; private final Version matchVersion; private static final char NOMARKER = '\uFFFF'; @@ -131,20 +131,19 @@ super(in); this.matchVersion = matchVersion; this.marker = marker; - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - int len = termAtt.termLength(); + int len = termAtt.length(); if (marker != NOMARKER) { len++; - termAtt.resizeTermBuffer(len); - termAtt.termBuffer()[len - 1] = marker; + termAtt.resizeBuffer(len); + termAtt.buffer()[len - 1] = marker; } - reverse( matchVersion, termAtt.termBuffer(), 0, len ); - termAtt.setTermLength(len); + reverse( matchVersion, termAtt.buffer(), 0, len ); + termAtt.setLength(len); return true; } else { return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** @@ -39,15 +39,13 @@ public final class DelimitedPayloadTokenFilter extends TokenFilter { public static final char DEFAULT_DELIMITER = '|'; private final char delimiter; - private final TermAttribute termAtt; - private final PayloadAttribute payAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class); private final PayloadEncoder encoder; public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) { super(input); - termAtt = addAttribute(TermAttribute.class); - payAtt = addAttribute(PayloadAttribute.class); this.delimiter = delimiter; this.encoder = encoder; } @@ -55,12 +53,12 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - final int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + final int length = termAtt.length(); for (int i = 0; i < length; i++) { if (buffer[i] == delimiter) { payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1)))); - termAtt.setTermLength(i); // simply set a new length + termAtt.setLength(i); // simply set a new length return true; } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (working copy) @@ -35,16 +35,14 @@ private String typeMatch; private Payload thePayload; - private PayloadAttribute payloadAtt; - private TypeAttribute typeAtt; + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) { super(input); //Need to encode the payload thePayload = new Payload(PayloadHelper.encodeFloat(payload)); this.typeMatch = typeMatch; - payloadAtt = addAttribute(PayloadAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override Index: modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java (working copy) @@ -33,13 +33,11 @@ * **/ public class TypeAsPayloadTokenFilter extends TokenFilter { - private PayloadAttribute payloadAtt; - private TypeAttribute typeAtt; + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public TypeAsPayloadTokenFilter(TokenStream input) { super(input); - payloadAtt = addAttribute(PayloadAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java (working copy) @@ -33,13 +33,11 @@ * **/ public class TokenOffsetPayloadTokenFilter extends TokenFilter { - protected OffsetAttribute offsetAtt; - protected PayloadAttribute payAtt; + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class); public TokenOffsetPayloadTokenFilter(TokenStream input) { super(input); - offsetAtt = addAttribute(OffsetAttribute.class); - payAtt = addAttribute(PayloadAttribute.class); } @Override Index: modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link import org.tartarus.snowball.SnowballProgram; @@ -42,7 +42,7 @@ private final SnowballProgram stemmer; - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public SnowballFilter(TokenStream input, SnowballProgram stemmer) { @@ -75,16 +75,16 @@ public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAttr.isKeyword()) { - char termBuffer[] = termAtt.termBuffer(); - final int length = termAtt.termLength(); + char termBuffer[] = termAtt.buffer(); + final int length = termAtt.length(); stemmer.setCurrent(termBuffer, length); stemmer.stem(); final char finalTerm[] = stemmer.getCurrentBuffer(); final int newLength = stemmer.getCurrentBufferLength(); if (finalTerm != termBuffer) - termAtt.setTermBuffer(finalTerm, 0, newLength); + termAtt.copyBuffer(finalTerm, 0, newLength); else - termAtt.setTermLength(newLength); + termAtt.setLength(newLength); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Normalizes Turkish token text to lower case. @@ -37,7 +37,7 @@ private static final int LATIN_SMALL_LETTER_I = '\u0069'; private static final int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131'; private static final int COMBINING_DOT_ABOVE = '\u0307'; - private final TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * Create a new TurkishLowerCaseFilter, that normalizes Turkish token text @@ -47,7 +47,6 @@ */ public TurkishLowerCaseFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); } @Override @@ -55,8 +54,8 @@ boolean iOrAfter = false; if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + int length = termAtt.length(); for (int i = 0; i < length;) { final int ch = Character.codePointAt(buffer, i); @@ -88,7 +87,7 @@ i += Character.toChars(Character.toLowerCase(ch), buffer, i); } - termAtt.setTermLength(length); + termAtt.setLength(length); return true; } else return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Normalizes token text to lower case. @@ -32,20 +32,19 @@ @Deprecated public final class RussianLowerCaseFilter extends TokenFilter { - private TermAttribute termAtt; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public RussianLowerCaseFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); } @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] chArray = termAtt.termBuffer(); - int chLen = termAtt.termLength(); + char[] chArray = termAtt.buffer(); + int chLen = termAtt.length(); for (int i = 0; i < chLen; i++) { chArray[i] = Character.toLowerCase(chArray[i]); Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.ru.RussianStemmer;//javadoc @link import org.apache.lucene.analysis.snowball.SnowballFilter; // javadoc @link @@ -51,17 +51,14 @@ /** * The actual token in the input stream. */ - private RussianStemmer stemmer = null; + private RussianStemmer stemmer = new RussianStemmer(); - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public RussianStemFilter(TokenStream in) { super(in); - stemmer = new RussianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** * Returns the next token in the stream, or null at EOS @@ -71,10 +68,10 @@ { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); final String s = stemmer.stem(term); if (s != null && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (working copy) @@ -137,10 +137,10 @@ */ private boolean isOutputHere = false; - private final CharTermAttribute termAtt; - private final OffsetAttribute offsetAtt; - private final PositionIncrementAttribute posIncrAtt; - private final TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); /** @@ -155,10 +155,6 @@ super(input); setMaxShingleSize(maxShingleSize); setMinShingleSize(minShingleSize); - this.termAtt = addAttribute(CharTermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); - this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); - this.typeAtt = addAttribute(TypeAttribute.class); } /** Index: modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -61,21 +61,20 @@ private CharArraySet stopTable; - private TermAttribute termAtt; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public ChineseFilter(TokenStream in) { super(in); stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { while (input.incrementToken()) { - char text[] = termAtt.termBuffer(); - int termLength = termAtt.termLength(); + char text[] = termAtt.buffer(); + int termLength = termAtt.length(); // why not key off token type here assuming ChineseTokenizer comes first? if (!stopTable.contains(text, 0, termLength)) { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (working copy) @@ -23,8 +23,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.AttributeSource; @@ -62,24 +62,16 @@ public ChineseTokenizer(Reader in) { super(in); - init(); } public ChineseTokenizer(AttributeSource source, Reader in) { super(source, in); - init(); } public ChineseTokenizer(AttributeFactory factory, Reader in) { super(factory, in); - init(); } - - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - } - + private int offset = 0, bufferIndex=0, dataLen=0; private final static int MAX_WORD_LEN = 255; private final static int IO_BUFFER_SIZE = 1024; @@ -90,8 +82,8 @@ private int length; private int start; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final void push(char c) { @@ -105,7 +97,7 @@ if (length>0) { //System.out.println(new String(buffer, 0, //length)); - termAtt.setTermBuffer(buffer, 0, length); + termAtt.copyBuffer(buffer, 0, length); offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); return true; } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words.. @@ -35,24 +35,20 @@ * @see KeywordMarkerFilter */ public final class ArabicStemFilter extends TokenFilter { - - private final ArabicStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final ArabicStemmer stemmer = new ArabicStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public ArabicStemFilter(TokenStream input) { super(input); - stemmer = new ArabicStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography. @@ -29,21 +29,18 @@ */ public final class ArabicNormalizationFilter extends TokenFilter { - - private final ArabicNormalizer normalizer; - private final TermAttribute termAtt; + private final ArabicNormalizer normalizer = new ArabicNormalizer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public ArabicNormalizationFilter(TokenStream input) { super(input); - normalizer = new ArabicNormalizer(); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); return true; } return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java (working copy) @@ -35,7 +35,7 @@ /** The first token must have non-zero positionIncrement **/ private boolean firstTokenPositioned = false; - private PositionIncrementAttribute posIncrAtt; + private PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); /** * Constructs a PositionFilter that assigns a position increment of zero to @@ -45,7 +45,6 @@ */ public PositionFilter(final TokenStream input) { super(input); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); } /** Index: modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java (working copy) @@ -21,14 +21,14 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link IndicNormalizer} to normalize text * in Indian Languages. */ public final class IndicNormalizationFilter extends TokenFilter { - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final IndicNormalizer normalizer = new IndicNormalizer(); public IndicNormalizationFilter(TokenStream input) { @@ -38,7 +38,7 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(), termAtt.termLength())); + termAtt.setLength(normalizer.normalize(termAtt.buffer(), termAtt.length())); return true; } else { return false; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -98,9 +98,9 @@ */ private boolean preIsTokened = false; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); //~ Constructors ----------------------------------------------------------- @@ -111,25 +111,16 @@ */ public CJKTokenizer(Reader in) { super(in); - init(); } public CJKTokenizer(AttributeSource source, Reader in) { super(source, in); - init(); } public CJKTokenizer(AttributeFactory factory, Reader in) { super(factory, in); - init(); } - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } - //~ Methods ---------------------------------------------------------------- /** @@ -287,7 +278,7 @@ } if (length > 0) { - termAtt.setTermBuffer(buffer, 0, length); + termAtt.copyBuffer(buffer, 0, length); offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); typeAtt.setType(TOKEN_TYPE_NAMES[tokenType]); return true; Index: modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java (revision 948225) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java (working copy) @@ -6,7 +6,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -37,23 +37,20 @@ * @see KeywordMarkerFilter */ public final class CzechStemFilter extends TokenFilter { - private final CzechStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CzechStemmer stemmer = new CzechStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public CzechStemFilter(TokenStream input) { super(input); - stemmer = new CzechStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else {