Index: backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java
===================================================================
--- backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java (revision 932398)
+++ backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java (working copy)
@@ -120,6 +120,7 @@
String[] y = StandardTokenizer.TOKEN_TYPES;
}
+ /* StandardAnalyzer was made final in 3.1:
private static class MyStandardAnalyzer extends StandardAnalyzer {
public MyStandardAnalyzer() {
super(org.apache.lucene.util.Version.LUCENE_CURRENT);
@@ -139,6 +140,7 @@
assertTrue(ts.incrementToken());
assertFalse(ts.incrementToken());
}
+ */
}
class PayloadSetter extends TokenFilter {
Index: src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (working copy)
@@ -19,7 +19,7 @@
import java.io.IOException;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@@ -61,18 +61,17 @@
public ASCIIFoldingFilter(TokenStream input)
{
super(input);
- termAtt = addAttribute(TermAttribute.class);
}
private char[] output = new char[512];
private int outputPos;
- private TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- final char[] buffer = termAtt.termBuffer();
- final int length = termAtt.termLength();
+ final char[] buffer = termAtt.buffer();
+ final int length = termAtt.length();
// If no characters actually require rewriting then we
// just return token as-is:
@@ -81,7 +80,7 @@
if (c >= '\u0080')
{
foldToASCII(buffer, length);
- termAtt.setTermBuffer(output, 0, outputPos);
+ termAtt.copyBuffer(output, 0, outputPos);
break;
}
}
Index: src/java/org/apache/lucene/analysis/CharTokenizer.java
===================================================================
--- src/java/org/apache/lucene/analysis/CharTokenizer.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/CharTokenizer.java (working copy)
@@ -21,7 +21,7 @@
import java.io.Reader;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CharacterUtils;
import org.apache.lucene.util.Version;
@@ -78,10 +78,7 @@
public CharTokenizer(Version matchVersion, Reader input) {
super(input);
charUtils = CharacterUtils.getInstance(matchVersion);
- offsetAtt = addAttribute(OffsetAttribute.class);
- termAtt = addAttribute(TermAttribute.class);
useOldAPI = useOldAPI(matchVersion);
- ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
}
@@ -99,10 +96,7 @@
Reader input) {
super(source, input);
charUtils = CharacterUtils.getInstance(matchVersion);
- offsetAtt = addAttribute(OffsetAttribute.class);
- termAtt = addAttribute(TermAttribute.class);
useOldAPI = useOldAPI(matchVersion);
- ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
}
/**
@@ -119,10 +113,7 @@
Reader input) {
super(factory, input);
charUtils = CharacterUtils.getInstance(matchVersion);
- offsetAtt = addAttribute(OffsetAttribute.class);
- termAtt = addAttribute(TermAttribute.class);
useOldAPI = useOldAPI(matchVersion);
- ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
}
/**
@@ -164,11 +155,11 @@
private static final int MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 4096;
- private final TermAttribute termAtt;
- private final OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);;
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final CharacterUtils charUtils;
- private final CharacterBuffer ioBuffer;
+ private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
/**
* @deprecated this will be removed in lucene 4.0
@@ -275,7 +266,7 @@
return incrementTokenOld();
int length = 0;
int start = bufferIndex;
- char[] buffer = termAtt.termBuffer();
+ char[] buffer = termAtt.buffer();
while (true) {
if (bufferIndex >= dataLen) {
offset += dataLen;
@@ -297,7 +288,7 @@
if (length == 0) // start of token
start = offset + bufferIndex - 1;
else if (length >= buffer.length-1) // check if a supplementary could run out of bounds
- buffer = termAtt.resizeTermBuffer(2+length); // make sure a supplementary fits in the buffer
+ buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test
break;
@@ -305,7 +296,7 @@
break; // return 'em
}
- termAtt.setTermLength(length);
+ termAtt.setLength(length);
offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
return true;
@@ -320,7 +311,7 @@
private boolean incrementTokenOld() throws IOException {
int length = 0;
int start = bufferIndex;
- char[] buffer = termAtt.termBuffer();
+ char[] buffer = termAtt.buffer();
final char[] oldIoBuffer = ioBuffer.getBuffer();
while (true) {
@@ -344,7 +335,7 @@
if (length == 0) // start of token
start = offset + bufferIndex - 1;
else if (length == buffer.length)
- buffer = termAtt.resizeTermBuffer(1+length);
+ buffer = termAtt.resizeBuffer(1+length);
buffer[length++] = normalize(c); // buffer it, normalized
@@ -355,7 +346,7 @@
break; // return 'em
}
- termAtt.setTermLength(length);
+ termAtt.setLength(length);
offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
return true;
}
Index: src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java (working copy)
@@ -1,7 +1,5 @@
package org.apache.lucene.analysis;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -19,6 +17,8 @@
* limitations under the License.
*/
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
/**
* A filter that replaces accented characters in the ISO Latin 1 character set
* (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
@@ -35,25 +35,24 @@
public final class ISOLatin1AccentFilter extends TokenFilter {
public ISOLatin1AccentFilter(TokenStream input) {
super(input);
- termAtt = addAttribute(TermAttribute.class);
}
private char[] output = new char[256];
private int outputPos;
- private TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@Override
public final boolean incrementToken() throws java.io.IOException {
if (input.incrementToken()) {
- final char[] buffer = termAtt.termBuffer();
- final int length = termAtt.termLength();
+ final char[] buffer = termAtt.buffer();
+ final int length = termAtt.length();
// If no characters actually require rewriting then we
// just return token as-is:
for(int i=0;i= '\u00c0' && c <= '\uFB06') {
removeAccents(buffer, length);
- termAtt.setTermBuffer(output, 0, outputPos);
+ termAtt.copyBuffer(output, 0, outputPos);
break;
}
}
Index: src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java (working copy)
@@ -21,7 +21,7 @@
import java.util.Set;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
/**
@@ -33,8 +33,8 @@
*/
public final class KeywordMarkerTokenFilter extends TokenFilter {
- private final KeywordAttribute keywordAttr;
- private final TermAttribute termAtt;
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final CharArraySet keywordSet;
/**
@@ -50,8 +50,6 @@
public KeywordMarkerTokenFilter(final TokenStream in,
final CharArraySet keywordSet) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
this.keywordSet = keywordSet;
}
@@ -73,8 +71,8 @@
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- keywordAttr.setKeyword(keywordSet.contains(termAtt.termBuffer(), 0,
- termAtt.termLength()));
+ keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0,
+ termAtt.length()));
return true;
} else
return false;
Index: src/java/org/apache/lucene/analysis/KeywordTokenizer.java
===================================================================
--- src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy)
@@ -21,7 +21,7 @@
import java.io.Reader;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
/**
@@ -31,10 +31,10 @@
private static final int DEFAULT_BUFFER_SIZE = 256;
- private boolean done;
+ private boolean done = false;
private int finalOffset;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public KeywordTokenizer(Reader input) {
this(input, DEFAULT_BUFFER_SIZE);
@@ -42,41 +42,34 @@
public KeywordTokenizer(Reader input, int bufferSize) {
super(input);
- init(bufferSize);
+ termAtt.resizeBuffer(bufferSize);
}
public KeywordTokenizer(AttributeSource source, Reader input, int bufferSize) {
super(source, input);
- init(bufferSize);
+ termAtt.resizeBuffer(bufferSize);
}
public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
super(factory, input);
- init(bufferSize);
+ termAtt.resizeBuffer(bufferSize);
}
- private void init(int bufferSize) {
- this.done = false;
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- termAtt.resizeTermBuffer(bufferSize);
- }
-
@Override
public final boolean incrementToken() throws IOException {
if (!done) {
clearAttributes();
done = true;
int upto = 0;
- char[] buffer = termAtt.termBuffer();
+ char[] buffer = termAtt.buffer();
while (true) {
final int length = input.read(buffer, upto, buffer.length-upto);
if (length == -1) break;
upto += length;
if (upto == buffer.length)
- buffer = termAtt.resizeTermBuffer(1+buffer.length);
+ buffer = termAtt.resizeBuffer(1+buffer.length);
}
- termAtt.setTermLength(upto);
+ termAtt.setLength(upto);
finalOffset = correctOffset(upto);
offsetAtt.setOffset(correctOffset(0), finalOffset);
return true;
Index: src/java/org/apache/lucene/analysis/LengthFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/LengthFilter.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/LengthFilter.java (working copy)
@@ -19,17 +19,17 @@
import java.io.IOException;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Removes words that are too long or too short from the stream.
*/
public final class LengthFilter extends TokenFilter {
- final int min;
- final int max;
+ private final int min;
+ private final int max;
- private TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Build a filter that removes words that are too long or too
@@ -40,7 +40,6 @@
super(in);
this.min = min;
this.max = max;
- termAtt = addAttribute(TermAttribute.class);
}
/**
@@ -50,7 +49,7 @@
public final boolean incrementToken() throws IOException {
// return the first non-stop word found
while (input.incrementToken()) {
- int len = termAtt.termLength();
+ int len = termAtt.length();
if (len >= min && len <= max) {
return true;
}
Index: src/java/org/apache/lucene/analysis/LowerCaseFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/LowerCaseFilter.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/LowerCaseFilter.java (working copy)
@@ -19,7 +19,7 @@
import java.io.IOException;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.CharacterUtils;
import org.apache.lucene.util.Version;
@@ -34,7 +34,8 @@
*/
public final class LowerCaseFilter extends TokenFilter {
private final CharacterUtils charUtils;
-
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
/**
* Create a new LowerCaseFilter, that normalizes token text to lower case.
*
@@ -43,7 +44,6 @@
*/
public LowerCaseFilter(Version matchVersion, TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
charUtils = CharacterUtils.getInstance(matchVersion);
}
@@ -55,13 +55,11 @@
this(Version.LUCENE_30, in);
}
- private TermAttribute termAtt;
-
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- final char[] buffer = termAtt.termBuffer();
- final int length = termAtt.termLength();
+ final char[] buffer = termAtt.buffer();
+ final int length = termAtt.length();
for (int i = 0; i < length;) {
i += Character.toChars(
Character.toLowerCase(
Index: src/java/org/apache/lucene/analysis/NumericTokenStream.java
===================================================================
--- src/java/org/apache/lucene/analysis/NumericTokenStream.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/NumericTokenStream.java (working copy)
@@ -26,7 +26,6 @@
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -118,11 +117,14 @@
this.delegate = delegate;
}
- @Override
+ @Override @SuppressWarnings("deprecation")
public AttributeImpl createAttributeInstance(Class extends Attribute> attClass) {
if (attClass == NumericTermAttribute.class)
return new NumericTermAttributeImpl(ts);
- if (attClass.isAssignableFrom(CharTermAttribute.class) || attClass.isAssignableFrom(TermAttribute.class))
+ if (attClass.isAssignableFrom(CharTermAttribute.class) ||
+ // TODO: remove in 4.0 (deprecated class, also remove the suppress above):
+ attClass.isAssignableFrom(org.apache.lucene.analysis.tokenattributes.TermAttribute.class)
+ )
throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute/TermAttribute.");
return delegate.createAttributeInstance(attClass);
}
Index: src/java/org/apache/lucene/analysis/PorterStemFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/PorterStemFilter.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/PorterStemFilter.java (working copy)
@@ -20,7 +20,7 @@
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** Transforms the token stream as per the Porter stemming algorithm.
Note: the input to the stemming filter must already be in lower case,
@@ -47,15 +47,12 @@
*/
public final class PorterStemFilter extends TokenFilter {
- private final PorterStemmer stemmer;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final PorterStemmer stemmer = new PorterStemmer();
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public PorterStemFilter(TokenStream in) {
super(in);
- stemmer = new PorterStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
@Override
@@ -63,8 +60,8 @@
if (!input.incrementToken())
return false;
- if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.termBuffer(), 0, termAtt.termLength()))
- termAtt.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
+ if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.buffer(), 0, termAtt.length()))
+ termAtt.copyBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
return true;
}
}
Index: src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy)
@@ -42,9 +42,13 @@
* are corrected (see LUCENE-1068)
*
*/
-public class StandardAnalyzer extends Analyzer {
- private Set> stopSet;
+public final class StandardAnalyzer extends StopwordAnalyzerBase {
+ /** Default maximum allowed token length */
+ public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+ private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
/**
* Specifies whether deprecated acronyms should be replaced with HOST type.
* See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
@@ -54,8 +58,16 @@
/** An unmodifiable set containing some common English words that are usually not
useful for searching. */
public static final Set> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
- private final Version matchVersion;
+ /** Builds an analyzer with the given stop words.
+ * @param matchVersion Lucene version to match See {@link
+ * above}
+ * @param stopWords stop words */
+ public StandardAnalyzer(Version matchVersion, Set> stopWords) {
+ super(matchVersion, stopWords);
+ replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
+ }
+
/** Builds an analyzer with the default stop words ({@link
* #STOP_WORDS_SET}).
* @param matchVersion Lucene version to match See {@link
@@ -65,16 +77,6 @@
this(matchVersion, STOP_WORDS_SET);
}
- /** Builds an analyzer with the given stop words.
- * @param matchVersion Lucene version to match See {@link
- * above}
- * @param stopWords stop words */
- public StandardAnalyzer(Version matchVersion, Set> stopWords) {
- stopSet = stopWords;
- replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
- this.matchVersion = matchVersion;
- }
-
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
* @param matchVersion Lucene version to match See {@link
@@ -93,28 +95,6 @@
this(matchVersion, WordlistLoader.getWordSet(stopwords));
}
- /** Constructs a {@link StandardTokenizer} filtered by a {@link
- StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
- tokenStream.setMaxTokenLength(maxTokenLength);
- TokenStream result = new StandardFilter(tokenStream);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopSet);
- return result;
- }
-
- private static final class SavedStreams {
- StandardTokenizer tokenStream;
- TokenStream filteredTokenStream;
- }
-
- /** Default maximum allowed token length */
- public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
- private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
/**
* Set maximum allowed token length. If a token is seen
* that exceeds this length then it is discarded. This
@@ -133,29 +113,20 @@
}
@Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
- if (overridesTokenStreamMethod) {
- // LUCENE-1678: force fallback to tokenStream() if we
- // have been subclassed and that subclass overrides
- // tokenStream but not reusableTokenStream
- return tokenStream(fieldName, reader);
- }
- SavedStreams streams = (SavedStreams) getPreviousTokenStream();
- if (streams == null) {
- streams = new SavedStreams();
- setPreviousTokenStream(streams);
- streams.tokenStream = new StandardTokenizer(matchVersion, reader);
- streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
- streams.filteredTokenStream = new LowerCaseFilter(matchVersion,
- streams.filteredTokenStream);
- streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopSet);
- } else {
- streams.tokenStream.reset(reader);
- }
- streams.tokenStream.setMaxTokenLength(maxTokenLength);
-
- streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym);
-
- return streams.filteredTokenStream;
+ protected TokenStreamComponents createComponents(final String fieldName,
+ final Reader reader) {
+ final StandardTokenizer source = new StandardTokenizer(matchVersion, reader);
+ source.setMaxTokenLength(maxTokenLength);
+ source.setReplaceInvalidAcronym(replaceInvalidAcronym);
+ TokenStream tok = new StandardFilter(source);
+ tok = new LowerCaseFilter(matchVersion, tok);
+ tok = new StopFilter(matchVersion, tok, stopwords);
+ return new TokenStreamComponents(source, tok) {
+ @Override
+ protected boolean reset(final Reader reader) throws IOException {
+ source.setMaxTokenLength(maxTokenLength);
+ return super.reset(reader);
+ }
+ };
}
}
Index: src/java/org/apache/lucene/analysis/standard/StandardFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/standard/StandardFilter.java (revision 932398)
+++ src/java/org/apache/lucene/analysis/standard/StandardFilter.java (working copy)
@@ -19,27 +19,24 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/** Normalizes tokens extracted with {@link StandardTokenizer}. */
public final class StandardFilter extends TokenFilter {
-
/** Construct filtering in. */
public StandardFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE];
private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM];
// this filters uses attribute type
- private final TypeAttribute typeAtt;
- private final TermAttribute termAtt;
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/** Returns the next token in the stream, or null at EOS.
*
Removes 's from the end of words.
@@ -51,16 +48,16 @@
return false;
}
- char[] buffer = termAtt.termBuffer();
- final int bufferLength = termAtt.termLength();
+ final char[] buffer = termAtt.buffer();
+ final int bufferLength = termAtt.length();
final String type = typeAtt.type();
if (type == APOSTROPHE_TYPE && // remove 's
- bufferLength >= 2 &&
+ bufferLength >= 2 &&
buffer[bufferLength-2] == '\'' &&
(buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
// Strip last 2 characters off
- termAtt.setTermLength(bufferLength - 2);
+ termAtt.setLength(bufferLength - 2);
} else if (type == ACRONYM_TYPE) { // remove dots
int upto = 0;
for(int i=0;i
*/
public final class CollationKeyFilter extends TokenFilter {
- private Collator collator = null;
- private TermAttribute termAtt;
+ private final Collator collator;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* @param input Source token stream
@@ -83,23 +83,18 @@
public CollationKeyFilter(TokenStream input, Collator collator) {
super(input);
this.collator = collator;
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- char[] termBuffer = termAtt.termBuffer();
- String termText = new String(termBuffer, 0, termAtt.termLength());
- byte[] collationKey = collator.getCollationKey(termText).toByteArray();
+ byte[] collationKey = collator.getCollationKey(termAtt.toString()).toByteArray();
int encodedLength = IndexableBinaryStringTools.getEncodedLength(
collationKey, 0, collationKey.length);
- if (encodedLength > termBuffer.length) {
- termAtt.resizeTermBuffer(encodedLength);
- }
- termAtt.setTermLength(encodedLength);
+ termAtt.resizeBuffer(encodedLength);
+ termAtt.setLength(encodedLength);
IndexableBinaryStringTools.encode(collationKey, 0, collationKey.length,
- termAtt.termBuffer(), 0, encodedLength);
+ termAtt.buffer(), 0, encodedLength);
return true;
} else {
return false;
Index: src/java/org/apache/lucene/queryParser/QueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 932398)
+++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy)
@@ -17,7 +17,7 @@
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
@@ -156,7 +156,7 @@
static public enum Operator { OR, AND }
/** Constructs a query parser.
- * @param matchVersion Lucene version to match. See above)
+ * @param matchVersion Lucene version to match. See above.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
*/
@@ -522,7 +522,7 @@
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
- TermAttribute termAtt = null;
+ CharTermAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
@@ -534,8 +534,8 @@
// success==false if we hit an exception
}
if (success) {
- if (buffer.hasAttribute(TermAttribute.class)) {
- termAtt = buffer.getAttribute(TermAttribute.class);
+ if (buffer.hasAttribute(CharTermAttribute.class)) {
+ termAtt = buffer.getAttribute(CharTermAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
@@ -581,7 +581,7 @@
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@@ -596,7 +596,7 @@
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@@ -619,7 +619,7 @@
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@@ -659,7 +659,7 @@
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
Index: src/java/org/apache/lucene/queryParser/QueryParser.jj
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 932398)
+++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy)
@@ -41,7 +41,7 @@
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
@@ -180,7 +180,7 @@
static public enum Operator { OR, AND }
/** Constructs a query parser.
- * @param matchVersion Lucene version to match. See {@link above)
+ * @param matchVersion Lucene version to match. See above.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
*/
@@ -546,7 +546,7 @@
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
- TermAttribute termAtt = null;
+ CharTermAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
@@ -558,8 +558,8 @@
// success==false if we hit an exception
}
if (success) {
- if (buffer.hasAttribute(TermAttribute.class)) {
- termAtt = buffer.getAttribute(TermAttribute.class);
+ if (buffer.hasAttribute(CharTermAttribute.class)) {
+ termAtt = buffer.getAttribute(CharTermAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
@@ -605,7 +605,7 @@
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@@ -620,7 +620,7 @@
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@@ -643,7 +643,7 @@
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@@ -683,7 +683,7 @@
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 932398)
+++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy)
@@ -11,12 +11,11 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
-import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
Index: src/java/org/apache/lucene/search/QueryTermVector.java
===================================================================
--- src/java/org/apache/lucene/search/QueryTermVector.java (revision 932398)
+++ src/java/org/apache/lucene/search/QueryTermVector.java (working copy)
@@ -28,7 +28,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.TermFreqVector;
/**
@@ -61,11 +61,11 @@
boolean hasMoreTokens = false;
stream.reset();
- TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
+ final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
hasMoreTokens = stream.incrementToken();
while (hasMoreTokens) {
- terms.add(termAtt.term());
+ terms.add(termAtt.toString());
hasMoreTokens = stream.incrementToken();
}
processTerms(terms.toArray(new String[terms.size()]));
Index: src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
===================================================================
--- src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy)
@@ -83,8 +83,8 @@
assertNotNull(output);
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
- assertTrue("has no TermAttribute", ts.hasAttribute(TermAttribute.class));
- TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
+ assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
+ CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = null;
if (startOffsets != null || endOffsets != null || finalOffset != null) {
@@ -108,7 +108,7 @@
for (int i = 0; i < output.length; i++) {
// extra safety to enforce, that the state is not preserved and also assign bogus values
ts.clearAttributes();
- termAtt.setTermBuffer("bogusTerm");
+ termAtt.setEmpty().append("bogusTerm");
if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
if (typeAtt != null) typeAtt.setType("bogusType");
if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
@@ -117,7 +117,7 @@
assertTrue("token "+i+" does not exist", ts.incrementToken());
assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled());
- assertEquals("term "+i, output[i], termAtt.term());
+ assertEquals("term "+i, output[i], termAtt.toString());
if (startOffsets != null)
assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset());
if (endOffsets != null)
Index: src/test/org/apache/lucene/analysis/TestAnalyzers.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestAnalyzers.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestAnalyzers.java (working copy)
@@ -24,7 +24,7 @@
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Payload;
public class TestAnalyzers extends BaseTokenStreamTestCase {
@@ -120,26 +120,6 @@
String[] y = StandardTokenizer.TOKEN_TYPES;
}
- private static class MyStandardAnalyzer extends StandardAnalyzer {
- public MyStandardAnalyzer() {
- super(TEST_VERSION_CURRENT);
- }
-
- @Override
- public TokenStream tokenStream(String field, Reader reader) {
- return new WhitespaceAnalyzer(TEST_VERSION_CURRENT).tokenStream(field, reader);
- }
- }
-
- public void testSubclassOverridingOnlyTokenStream() throws Throwable {
- Analyzer a = new MyStandardAnalyzer();
- TokenStream ts = a.reusableTokenStream("field", new StringReader("the"));
- // StandardAnalyzer will discard "the" (it's a
- // stopword), by my subclass will not:
- assertTrue(ts.incrementToken());
- assertFalse(ts.incrementToken());
- }
-
private static class LowerCaseWhitespaceAnalyzer extends Analyzer {
@Override
@@ -202,8 +182,8 @@
String highSurEndingLower = "bogustermboguster\ud801";
tokenizer.reset(new StringReader(highSurEndingUpper));
assertTokenStreamContents(filter, new String[] {highSurEndingLower});
- assertTrue(filter.hasAttribute(TermAttribute.class));
- char[] termBuffer = filter.getAttribute(TermAttribute.class).termBuffer();
+ assertTrue(filter.hasAttribute(CharTermAttribute.class));
+ char[] termBuffer = filter.getAttribute(CharTermAttribute.class).buffer();
int length = highSurEndingLower.length();
assertEquals('\ud801', termBuffer[length - 1]);
assertEquals('\udc3e', termBuffer[length]);
Index: src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (working copy)
@@ -17,7 +17,7 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
import java.util.List;
import java.util.ArrayList;
@@ -33,7 +33,7 @@
+" ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"));
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
assertTermEquals("Des", filter, termAtt);
assertTermEquals("mot", filter, termAtt);
@@ -1890,7 +1890,7 @@
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(inputText.toString()));
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
Iterator expectedIter = expectedOutputTokens.iterator();
while (expectedIter.hasNext()) {
assertTermEquals(expectedIter.next(), filter, termAtt);
@@ -1898,8 +1898,8 @@
assertFalse(filter.incrementToken());
}
- void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception {
+ void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception {
assertTrue(stream.incrementToken());
- assertEquals(expected, termAtt.term());
+ assertEquals(expected, termAtt.toString());
}
}
Index: src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (working copy)
@@ -21,7 +21,7 @@
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.TermVector;
@@ -43,7 +43,7 @@
Document doc = new Document();
TokenStream stream = new TokenStream() {
private int index = 0;
- private TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
@@ -52,7 +52,7 @@
return false;
} else {
clearAttributes();
- termAtt.setTermBuffer(tokens[index++]);
+ termAtt.append(tokens[index++]);
offsetAtt.setOffset(0,0);
return true;
}
@@ -100,11 +100,10 @@
private void checkTokens(TokenStream stream) throws IOException {
int count = 0;
- TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
- assertNotNull(termAtt);
+ CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
while (stream.incrementToken()) {
assertTrue(count < tokens.length);
- assertEquals(tokens[count], termAtt.term());
+ assertEquals(tokens[count], termAtt.toString());
count++;
}
Index: src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java (working copy)
@@ -17,14 +17,14 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase {
public void testU() throws Exception {
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"));
ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream);
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
assertTermEquals("Des", filter, termAtt);
assertTermEquals("mot", filter, termAtt);
assertTermEquals("cles", filter, termAtt);
@@ -103,8 +103,8 @@
assertFalse(filter.incrementToken());
}
- void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception {
+ void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception {
assertTrue(stream.incrementToken());
- assertEquals(expected, termAtt.term());
+ assertEquals(expected, termAtt.toString());
}
}
Index: src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java (working copy)
@@ -6,7 +6,7 @@
import java.util.Set;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;
/**
@@ -53,20 +53,20 @@
public static class LowerCaseFilterMock extends TokenFilter {
- private TermAttribute termAtt;
- private KeywordAttribute keywordAttr;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public LowerCaseFilterMock(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- if (!keywordAttr.isKeyword())
- termAtt.setTermBuffer(termAtt.term().toLowerCase());
+ if (!keywordAttr.isKeyword()) {
+ final String term = termAtt.toString().toLowerCase();
+ termAtt.setEmpty().append(term);
+ }
return true;
}
return false;
Index: src/test/org/apache/lucene/analysis/TestLengthFilter.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestLengthFilter.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestLengthFilter.java (working copy)
@@ -17,7 +17,7 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
public class TestLengthFilter extends BaseTokenStreamTestCase {
@@ -26,14 +26,14 @@
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
LengthFilter filter = new LengthFilter(stream, 2, 6);
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
assertTrue(filter.incrementToken());
- assertEquals("short", termAtt.term());
+ assertEquals("short", termAtt.toString());
assertTrue(filter.incrementToken());
- assertEquals("ab", termAtt.term());
+ assertEquals("ab", termAtt.toString());
assertTrue(filter.incrementToken());
- assertEquals("foo", termAtt.term());
+ assertEquals("foo", termAtt.toString());
assertFalse(filter.incrementToken());
}
Index: src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java (working copy)
@@ -2,7 +2,7 @@
import java.io.StringReader;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -30,19 +30,19 @@
TokenStream tokenStream = analyzer.tokenStream("field",
new StringReader(text));
- TermAttribute termAtt = tokenStream.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
assertTrue(tokenStream.incrementToken());
assertEquals("WhitespaceAnalyzer does not lowercase",
"Qwerty",
- termAtt.term());
+ termAtt.toString());
tokenStream = analyzer.tokenStream("special",
new StringReader(text));
- termAtt = tokenStream.getAttribute(TermAttribute.class);
+ termAtt = tokenStream.getAttribute(CharTermAttribute.class);
assertTrue(tokenStream.incrementToken());
assertEquals("SimpleAnalyzer lowercases",
"qwerty",
- termAtt.term());
+ termAtt.toString());
}
}
Index: src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestStopAnalyzer.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestStopAnalyzer.java (working copy)
@@ -18,7 +18,7 @@
*/
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import java.io.StringReader;
@@ -51,10 +51,10 @@
StringReader reader = new StringReader("This is a test of the english stop analyzer");
TokenStream stream = stop.tokenStream("test", reader);
assertTrue(stream != null);
- TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
while (stream.incrementToken()) {
- assertFalse(inValidTokens.contains(termAtt.term()));
+ assertFalse(inValidTokens.contains(termAtt.toString()));
}
}
@@ -67,11 +67,11 @@
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
TokenStream stream = newStop.tokenStream("test", reader);
assertNotNull(stream);
- TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
- String text = termAtt.term();
+ String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
}
@@ -88,11 +88,11 @@
TokenStream stream = newStop.tokenStream("test", reader);
assertNotNull(stream);
int i = 0;
- TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
- String text = termAtt.term();
+ String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
}
Index: src/test/org/apache/lucene/analysis/TestStopFilter.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestStopFilter.java (revision 932398)
+++ src/test/org/apache/lucene/analysis/TestStopFilter.java (working copy)
@@ -17,7 +17,7 @@
*/
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.English;
import org.apache.lucene.util.Version;
@@ -37,11 +37,11 @@
StringReader reader = new StringReader("Now is The Time");
Set stopWords = new HashSet(Arrays.asList("is", "the", "Time"));
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
- final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
+ final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
- assertEquals("Now", termAtt.term());
+ assertEquals("Now", termAtt.toString());
assertTrue(stream.incrementToken());
- assertEquals("The", termAtt.term());
+ assertEquals("The", termAtt.toString());
assertFalse(stream.incrementToken());
}
@@ -49,9 +49,9 @@
StringReader reader = new StringReader("Now is The Time");
Set