Index: src/java/org/apache/lucene/analysis/CachingTokenFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/CachingTokenFilter.java (revision 792427) +++ src/java/org/apache/lucene/analysis/CachingTokenFilter.java (working copy) @@ -22,8 +22,6 @@ import java.util.LinkedList; import java.util.List; -import org.apache.lucene.util.AttributeSource; - /** * This class can be used if the Tokens of a TokenStream * are intended to be consumed more than once. It caches @@ -33,33 +31,16 @@ * {@link TokenStream#reset()}, which repositions the * stream to the first Token. * + * @deprecated Use TODO instead */ public class CachingTokenFilter extends TokenFilter { private List cache; - private Iterator iterator; + private Iterator iterator; public CachingTokenFilter(TokenStream input) { super(input); } - public boolean incrementToken() throws IOException { - if (cache == null) { - // fill cache lazily - cache = new LinkedList(); - fillCache(); - iterator = cache.iterator(); - } - - if (!iterator.hasNext()) { - // the cache is exhausted, return null - return false; - } - // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. - AttributeSource state = (AttributeSource) iterator.next(); - state.restoreState(this); - return true; - } - /** @deprecated */ public Token next(final Token reusableToken) throws IOException { assert reusableToken != null; @@ -85,13 +66,6 @@ } } - private void fillCache() throws IOException { - while(input.incrementToken()) { - cache.add(captureState()); - } - } - - /** @deprecated */ private void fillCache(final Token reusableToken) throws IOException { for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) { cache.add(nextToken.clone()); Index: src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java (revision 792427) +++ src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java (working copy) @@ -57,28 +57,6 @@ } else return false; } - - /** @deprecated */ - public final Token next(final Token reusableToken) throws java.io.IOException { - assert reusableToken != null; - Token nextToken = input.next(reusableToken); - if (nextToken != null) { - final char[] buffer = nextToken.termBuffer(); - final int length = nextToken.termLength(); - // If no characters actually require rewriting then we - // just return token as-is: - for(int i=0;i= '\u00c0' && c <= '\uFB06') { - removeAccents(buffer, length); - nextToken.setTermBuffer(output, 0, outputPos); - break; - } - } - return nextToken; - } else - return null; - } /** * To replace accented characters in a String by unaccented equivalents. Index: src/java/org/apache/lucene/analysis/KeywordTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision 792427) +++ src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy) @@ -65,30 +65,6 @@ return false; } - /** @deprecated */ - public Token next(final Token reusableToken) throws IOException { - assert reusableToken != null; - if (!done) { - done = true; - int upto = 0; - reusableToken.clear(); - char[] buffer = reusableToken.termBuffer(); - while (true) { - final int length = input.read(buffer, upto, buffer.length-upto); - if (length == -1) break; - upto += length; - if (upto == buffer.length) - buffer = reusableToken.resizeTermBuffer(1+buffer.length); - } - reusableToken.setTermLength(upto); - reusableToken.setStartOffset(input.correctOffset(0)); - reusableToken.setEndOffset(input.correctOffset(upto)); - - return reusableToken; - } - return null; - } - public void reset(Reader input) throws IOException { super.reset(input); this.done = false; Index: src/java/org/apache/lucene/analysis/LengthFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/LengthFilter.java (revision 792427) +++ src/java/org/apache/lucene/analysis/LengthFilter.java (working copy) @@ -61,24 +61,4 @@ // reached EOS -- return null return false; } - - /** - * Returns the next input Token whose term() is the right len - * @deprecated - */ - public final Token next(final Token reusableToken) throws IOException - { - assert reusableToken != null; - // return the first non-stop word found - for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) - { - int len = nextToken.termLength(); - if (len >= min && len <= max) { - return nextToken; - } - // note: else we ignore it but should we index each part of it? - } - // reached EOS -- return null - return null; - } } Index: src/java/org/apache/lucene/analysis/LowerCaseFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/LowerCaseFilter.java (revision 792427) +++ src/java/org/apache/lucene/analysis/LowerCaseFilter.java (working copy) @@ -46,20 +46,4 @@ } else return false; } - - /** @deprecated */ - public final Token next(final Token reusableToken) throws IOException { - assert reusableToken != null; - Token nextToken = input.next(reusableToken); - if (nextToken != null) { - - final char[] buffer = nextToken.termBuffer(); - final int length = nextToken.termLength(); - for(int i=0;i= valSize) - return null; - - reusableToken.clear(); - - final char[] buffer; - switch (valSize) { - case 64: - buffer = reusableToken.resizeTermBuffer(NumericUtils.LONG_BUF_SIZE); - reusableToken.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer)); - reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_64 : TOKEN_TYPE_LOWER_PREC_64); - break; - - case 32: - buffer = reusableToken.resizeTermBuffer(NumericUtils.INT_BUF_SIZE); - reusableToken.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer)); - reusableToken.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC_32 : TOKEN_TYPE_LOWER_PREC_32); - break; - - default: - // should not happen - throw new IllegalArgumentException("valSize must be 32 or 64"); - } - - reusableToken.setPositionIncrement((shift == 0) ? 1 : 0); - shift += precisionStep; - return reusableToken; - } // @Override public String toString() { Index: src/java/org/apache/lucene/analysis/PorterStemFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/PorterStemFilter.java (revision 792427) +++ src/java/org/apache/lucene/analysis/PorterStemFilter.java (working copy) @@ -57,16 +57,4 @@ termAtt.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength()); return true; } - - /** @deprecated */ - public final Token next(final Token reusableToken) throws IOException { - assert reusableToken != null; - Token nextToken = input.next(reusableToken); - if (nextToken == null) - return null; - - if (stemmer.stem(nextToken.termBuffer(), 0, nextToken.termLength())) - nextToken.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength()); - return nextToken; - } } Index: src/java/org/apache/lucene/analysis/SinkTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/SinkTokenizer.java (revision 792427) +++ src/java/org/apache/lucene/analysis/SinkTokenizer.java (working copy) @@ -22,19 +22,18 @@ import java.util.Iterator; import java.util.List; -import org.apache.lucene.util.AttributeSource; - /** * A SinkTokenizer can be used to cache Tokens for use in an Analyzer * * @see TeeTokenFilter + * @deprecated Use TODO instead * **/ public class SinkTokenizer extends Tokenizer { protected List/**/ lst = new ArrayList/**/(); protected Iterator/**/ iter; - + public SinkTokenizer(List/**/ input) { this.lst = input; if (this.lst == null) this.lst = new ArrayList/**/(); @@ -64,29 +63,9 @@ } /** - * Increments this stream to the next token out of the list of cached tokens - * @throws IOException - */ - public boolean incrementToken() throws IOException { - if (iter == null) iter = lst.iterator(); - // Since this TokenStream can be reset we have to maintain the tokens as immutable - if (iter.hasNext()) { - AttributeSource state = (AttributeSource) iter.next(); - state.restoreState(this); - return true; - } - return false; - } - - public void add(AttributeSource source) throws IOException { - lst.add(source); - } - - /** * Returns the next token out of the list of cached tokens * @return The next {@link org.apache.lucene.analysis.Token} in the Sink. * @throws IOException - * @deprecated */ public Token next(final Token reusableToken) throws IOException { assert reusableToken != null; @@ -99,6 +78,8 @@ return null; } + + /** * Override this method to cache only certain tokens, or new tokens based * on the old tokens. Index: src/java/org/apache/lucene/analysis/standard/StandardFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardFilter.java (revision 792427) +++ src/java/org/apache/lucene/analysis/standard/StandardFilter.java (working copy) @@ -73,39 +73,4 @@ return true; } - - /** Returns the next token in the stream, or null at EOS. - *

Removes 's from the end of words. - *

Removes dots from acronyms. - * @deprecated - */ - public final Token next(final Token reusableToken) throws java.io.IOException { - assert reusableToken != null; - Token nextToken = input.next(reusableToken); - - if (nextToken == null) - return null; - - char[] buffer = nextToken.termBuffer(); - final int bufferLength = nextToken.termLength(); - final String type = nextToken.type(); - - if (type == APOSTROPHE_TYPE && // remove 's - bufferLength >= 2 && - buffer[bufferLength-2] == '\'' && - (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) { - // Strip last 2 characters off - nextToken.setTermLength(bufferLength - 2); - } else if (type == ACRONYM_TYPE) { // remove dots - int upto = 0; - for(int i=0;i A Token is an occurrence of a term from the text of a field. It consists of a term's text, the start and end offset of the term in the text of the field, and a type string. @@ -117,11 +121,13 @@

+ @deprecated This class is now deprecated and a new TokenStream API was introduced with Lucene 2.9. + See Javadocs in {@link TokenStream} for further details. @see org.apache.lucene.index.Payload - @deprecated A new TokenStream API was introduced with Lucene 2.9. - See javadocs in {@link TokenStream} for further details. */ -public class Token implements Cloneable { +public class Token extends AttributeImpl + implements Cloneable, TermAttribute, TypeAttribute, PositionIncrementAttribute, + FlagsAttribute, OffsetAttribute, PayloadAttribute { public static final String DEFAULT_TYPE = "word"; @@ -134,7 +140,7 @@ /** * Characters for the term text. * @deprecated This will be made private. Instead, use: - * {@link termBuffer()}, + * {@link #termBuffer()}, * {@link #setTermBuffer(char[], int, int)}, * {@link #setTermBuffer(String)}, or * {@link #setTermBuffer(String, int, int)} @@ -144,28 +150,28 @@ /** * Length of term text in the buffer. * @deprecated This will be made private. Instead, use: - * {@link termLength()}, or @{link setTermLength(int)}. + * {@link #termLength()}, or @{link setTermLength(int)}. */ int termLength; /** * Start in source text. * @deprecated This will be made private. Instead, use: - * {@link startOffset()}, or @{link setStartOffset(int)}. + * {@link #startOffset()}, or @{link setStartOffset(int)}. */ int startOffset; /** * End in source text. * @deprecated This will be made private. Instead, use: - * {@link endOffset()}, or @{link setEndOffset(int)}. + * {@link #endOffset()}, or @{link setEndOffset(int)}. */ int endOffset; /** * The lexical type of the token. * @deprecated This will be made private. Instead, use: - * {@link type()}, or @{link setType(String)}. + * {@link #type()}, or @{link setType(String)}. */ String type = DEFAULT_TYPE; @@ -173,13 +179,13 @@ /** * @deprecated This will be made private. Instead, use: - * {@link getPayload()}, or @{link setPayload(Payload)}. + * {@link #getPayload()}, or @{link setPayload(Payload)}. */ Payload payload; /** * @deprecated This will be made private. Instead, use: - * {@link getPositionIncrement()}, or @{link setPositionIncrement(String)}. + * {@link #getPositionIncrement()}, or @{link setPositionIncrement(String)}. */ int positionIncrement = 1; @@ -561,6 +567,13 @@ public void setEndOffset(int offset) { this.endOffset = offset; } + + /** Set the starting and ending offset. + @see #startOffset() and #endOffset()*/ + public void setOffset(int startOffset, int endOffset) { + this.startOffset = startOffset; + this.endOffset = endOffset; + } /** Returns this Token's lexical type. Defaults to "word". */ public final String type() { @@ -640,19 +653,15 @@ } public Object clone() { - try { - Token t = (Token)super.clone(); - // Do a deep clone - if (termBuffer != null) { - t.termBuffer = (char[]) termBuffer.clone(); - } - if (payload != null) { - t.setPayload((Payload) payload.clone()); - } - return t; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); // shouldn't happen + Token t = (Token)super.clone(); + // Do a deep clone + if (termBuffer != null) { + t.termBuffer = (char[]) termBuffer.clone(); } + if (payload != null) { + t.setPayload((Payload) payload.clone()); + } + return t; } /** Makes a clone, but replaces the term buffer & @@ -862,4 +871,9 @@ type = prototype.type; payload = prototype.payload; } + + public void copyTo(AttributeImpl target) { + Token to = (Token) target; + to.reinit(this); + } } Index: src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java (revision 792427) +++ src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java (working copy) @@ -17,10 +17,7 @@ * limitations under the License. */ -import java.io.Serializable; -import org.apache.lucene.util.Attribute; - /** * This attribute can be used to pass different flags down the tokenizer chain, * e. g. from one TokenFilter to another one. @@ -31,9 +28,7 @@ * We will make our best efforts to keep the APIs backwards-compatible. */ -public class FlagsAttribute extends Attribute implements Cloneable, Serializable { - private int flags = 0; - +public interface FlagsAttribute extends TokenAttribute { /** * EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. *

@@ -44,43 +39,10 @@ * * @return The bits */ - public int getFlags() { - return flags; - } + public int getFlags(); /** * @see #getFlags() */ - public void setFlags(int flags) { - this.flags = flags; - } - - public void clear() { - flags = 0; - } - - public String toString() { - return "flags=" + flags; - } - - public boolean equals(Object other) { - if (this == other) { - return true; - } - - if (other instanceof FlagsAttribute) { - return ((FlagsAttribute) other).flags == flags; - } - - return false; - } - - public int hashCode() { - return flags; - } - - public void copyTo(Attribute target) { - FlagsAttribute t = (FlagsAttribute) target; - t.setFlags(flags); - } + public void setFlags(int flags); } Index: src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java (revision 0) @@ -0,0 +1,82 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.util.AttributeImpl; + +/** + * This attribute can be used to pass different flags down the tokenizer chain, + * e. g. from one TokenFilter to another one. + * + *

+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + + */ +public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable, Serializable { + private int flags = 0; + + /** + * EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. + *

+ * + * Get the bitset for any bits that have been set. This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes. + * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s. + * + * + * @return The bits + */ + public int getFlags() { + return flags; + } + + /** + * @see #getFlags() + */ + public void setFlags(int flags) { + this.flags = flags; + } + + public void clear() { + flags = 0; + } + + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other instanceof FlagsAttributeImpl) { + return ((FlagsAttributeImpl) other).flags == flags; + } + + return false; + } + + public int hashCode() { + return flags; + } + + public void copyTo(AttributeImpl target) { + FlagsAttribute t = (FlagsAttribute) target; + t.setFlags(flags); + } +} Index: src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java (revision 792427) +++ src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java (working copy) @@ -17,10 +17,7 @@ * limitations under the License. */ -import java.io.Serializable; -import org.apache.lucene.util.Attribute; - /** * The start and end character offset of a Token. * @@ -29,67 +26,23 @@ * The APIs introduced in these classes with Lucene 2.9 might change in the future. * We will make our best efforts to keep the APIs backwards-compatible. */ -public class OffsetAttribute extends Attribute implements Cloneable, Serializable { - private int startOffset; - private int endOffset; - +public interface OffsetAttribute extends TokenAttribute { /** Returns this Token's starting offset, the position of the first character corresponding to this token in the source text. Note that the difference between endOffset() and startOffset() may not be equal to termText.length(), as the term text may have been altered by a stemmer or some other filter. */ - public int startOffset() { - return startOffset; - } + public int startOffset(); /** Set the starting and ending offset. @see #startOffset() and #endOffset()*/ - public void setOffset(int startOffset, int endOffset) { - this.startOffset = startOffset; - this.endOffset = endOffset; - } + public void setOffset(int startOffset, int endOffset); /** Returns this Token's ending offset, one greater than the position of the last character corresponding to this token in the source text. The length of the token in the source text is (endOffset - startOffset). */ - public int endOffset() { - return endOffset; - } - - - public void clear() { - startOffset = 0; - endOffset = 0; - } - - public String toString() { - return "start=" + startOffset + ",end=" + endOffset; - } - - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof OffsetAttribute) { - OffsetAttribute o = (OffsetAttribute) other; - return o.startOffset == startOffset && o.endOffset == endOffset; - } - - return false; - } - - public int hashCode() { - int code = startOffset; - code = code * 31 + endOffset; - return code; - } - - public void copyTo(Attribute target) { - OffsetAttribute t = (OffsetAttribute) target; - t.setOffset(startOffset, endOffset); - } + public int endOffset(); } Index: src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java (revision 0) @@ -0,0 +1,91 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.util.AttributeImpl; + +/** + * The start and end character offset of a Token. + * + *

+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + */ +public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable, Serializable { + private int startOffset; + private int endOffset; + + /** Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. */ + public int startOffset() { + return startOffset; + } + + + /** Set the starting and ending offset. + @see #startOffset() and #endOffset()*/ + public void setOffset(int startOffset, int endOffset) { + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + + /** Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). */ + public int endOffset() { + return endOffset; + } + + + public void clear() { + startOffset = 0; + endOffset = 0; + } + + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other instanceof OffsetAttributeImpl) { + OffsetAttributeImpl o = (OffsetAttributeImpl) other; + return o.startOffset == startOffset && o.endOffset == endOffset; + } + + return false; + } + + public int hashCode() { + int code = startOffset; + code = code * 31 + endOffset; + return code; + } + + public void copyTo(AttributeImpl target) { + OffsetAttribute t = (OffsetAttribute) target; + t.setOffset(startOffset, endOffset); + } +} Index: src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (revision 792427) +++ src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (working copy) @@ -17,10 +17,7 @@ * limitations under the License. */ -import java.io.Serializable; - import org.apache.lucene.index.Payload; -import org.apache.lucene.util.Attribute; /** * The payload of a Token. See also {@link Payload}. @@ -30,80 +27,14 @@ * The APIs introduced in these classes with Lucene 2.9 might change in the future. * We will make our best efforts to keep the APIs backwards-compatible. */ -public class PayloadAttribute extends Attribute implements Cloneable, Serializable { - private Payload payload; - +public interface PayloadAttribute extends TokenAttribute { /** - * Initialize this attribute with no payload. - */ - public PayloadAttribute() {} - - /** - * Initialize this attribute with the given payload. - */ - public PayloadAttribute(Payload payload) { - this.payload = payload; - } - - /** * Returns this Token's payload. */ - public Payload getPayload() { - return this.payload; - } + public Payload getPayload(); /** * Sets this Token's payload. */ - public void setPayload(Payload payload) { - this.payload = payload; - } - - public void clear() { - payload = null; - } - - public String toString() { - if (payload == null) { - return "payload=null"; - } - - return "payload=" + payload.toString(); - } - - public Object clone() { - PayloadAttribute clone = (PayloadAttribute) super.clone(); - if (payload != null) { - clone.payload = (Payload) payload.clone(); - } - return clone; - } - - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof PayloadAttribute) { - PayloadAttribute o = (PayloadAttribute) other; - if (o.payload == null || payload == null) { - return o.payload == null && payload == null; - } - - return o.payload.equals(payload); - } - - return false; - } - - public int hashCode() { - return (payload == null) ? 0 : payload.hashCode(); - } - - public void copyTo(Attribute target) { - PayloadAttribute t = (PayloadAttribute) target; - t.setPayload((payload == null) ? null : (Payload) payload.clone()); - } - - + public void setPayload(Payload payload); } Index: src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java (revision 0) @@ -0,0 +1,101 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.index.Payload; +import org.apache.lucene.util.AttributeImpl; + +/** + * The payload of a Token. See also {@link Payload}. + * + *

+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + */ +public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable, Serializable { + private Payload payload; + + /** + * Initialize this attribute with no payload. + */ + public PayloadAttributeImpl() {} + + /** + * Initialize this attribute with the given payload. + */ + public PayloadAttributeImpl(Payload payload) { + this.payload = payload; + } + + /** + * Returns this Token's payload. + */ + public Payload getPayload() { + return this.payload; + } + + /** + * Sets this Token's payload. + */ + public void setPayload(Payload payload) { + this.payload = payload; + } + + public void clear() { + payload = null; + } + + public Object clone() { + PayloadAttributeImpl clone = (PayloadAttributeImpl) super.clone(); + if (payload != null) { + clone.payload = (Payload) payload.clone(); + } + return clone; + } + + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other instanceof PayloadAttribute) { + PayloadAttributeImpl o = (PayloadAttributeImpl) other; + if (o.payload == null || payload == null) { + return o.payload == null && payload == null; + } + + return o.payload.equals(payload); + } + + return false; + } + + public int hashCode() { + return (payload == null) ? 0 : payload.hashCode(); + } + + public void copyTo(AttributeImpl target) { + PayloadAttribute t = (PayloadAttribute) target; + t.setPayload((payload == null) ? null : (Payload) payload.clone()); + } + + +} Index: src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java (revision 792427) +++ src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java (working copy) @@ -20,7 +20,7 @@ import java.io.Serializable; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; /** The positionIncrement determines the position of this token * relative to the previous Token in a {@link TokenStream}, used in phrase @@ -53,54 +53,15 @@ * * @see org.apache.lucene.index.TermPositions */ -public class PositionIncrementAttribute extends Attribute implements Cloneable, Serializable { - private int positionIncrement = 1; - +public interface PositionIncrementAttribute extends TokenAttribute { /** Set the position increment. The default value is one. * * @param positionIncrement the distance from the prior term */ - public void setPositionIncrement(int positionIncrement) { - if (positionIncrement < 0) - throw new IllegalArgumentException - ("Increment must be zero or greater: " + positionIncrement); - this.positionIncrement = positionIncrement; - } + public void setPositionIncrement(int positionIncrement); /** Returns the position increment of this Token. * @see #setPositionIncrement */ - public int getPositionIncrement() { - return positionIncrement; - } - - public void clear() { - this.positionIncrement = 1; - } - - public String toString() { - return "positionIncrement=" + positionIncrement; - } - - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof PositionIncrementAttribute) { - return positionIncrement == ((PositionIncrementAttribute) other).positionIncrement; - } - - return false; - } - - public int hashCode() { - return positionIncrement; - } - - public void copyTo(Attribute target) { - PositionIncrementAttribute t = (PositionIncrementAttribute) target; - t.setPositionIncrement(positionIncrement); - } - + public int getPositionIncrement(); } Index: src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java (revision 0) @@ -0,0 +1,102 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.AttributeImpl; + +/** The positionIncrement determines the position of this token + * relative to the previous Token in a {@link TokenStream}, used in phrase + * searching. + * + *

The default value is one. + * + *

Some common uses for this are:

    + * + *
  • Set it to zero to put multiple terms in the same position. This is + * useful if, e.g., a word has multiple stems. Searches for phrases + * including either stem will match. In this case, all but the first stem's + * increment should be set to zero: the increment of the first instance + * should be one. Repeating a token with an increment of zero can also be + * used to boost the scores of matches on that token. + * + *
  • Set it to values greater than one to inhibit exact phrase matches. + * If, for example, one does not want phrases to match across removed stop + * words, then one could build a stop word filter that removes stop words and + * also sets the increment to the number of stop words removed before each + * non-stop word. Then exact phrase queries will only match when the terms + * occur with no intervening stop words. + * + *
+ * + *

+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + * + * @see org.apache.lucene.index.TermPositions + */ +public class PositionIncrementAttributeImpl extends AttributeImpl implements PositionIncrementAttribute, Cloneable, Serializable { + private int positionIncrement = 1; + + /** Set the position increment. The default value is one. + * + * @param positionIncrement the distance from the prior term + */ + public void setPositionIncrement(int positionIncrement) { + if (positionIncrement < 0) + throw new IllegalArgumentException + ("Increment must be zero or greater: " + positionIncrement); + this.positionIncrement = positionIncrement; + } + + /** Returns the position increment of this Token. + * @see #setPositionIncrement + */ + public int getPositionIncrement() { + return positionIncrement; + } + + public void clear() { + this.positionIncrement = 1; + } + + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other instanceof PositionIncrementAttributeImpl) { + return positionIncrement == ((PositionIncrementAttributeImpl) other).positionIncrement; + } + + return false; + } + + public int hashCode() { + return positionIncrement; + } + + public void copyTo(AttributeImpl target) { + PositionIncrementAttribute t = (PositionIncrementAttribute) target; + t.setPositionIncrement(positionIncrement); + } + +} Index: src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java (revision 792427) +++ src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java (working copy) @@ -17,11 +17,7 @@ * limitations under the License. */ -import java.io.Serializable; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Attribute; - /** * The term text of a Token. * @@ -30,12 +26,7 @@ * The APIs introduced in these classes with Lucene 2.9 might change in the future. * We will make our best efforts to keep the APIs backwards-compatible. */ -public class TermAttribute extends Attribute implements Cloneable, Serializable { - private static int MIN_BUFFER_SIZE = 10; - - private char[] termBuffer; - private int termLength; - +public interface TermAttribute extends TokenAttribute { /** Returns the Token's term text. * * This method has a performance penalty @@ -45,38 +36,20 @@ * String, use this method, which is nothing more than * a convenience call to new String(token.termBuffer(), 0, token.termLength()) */ - public String term() { - initTermBuffer(); - return new String(termBuffer, 0, termLength); - } - + public String term(); + /** Copies the contents of buffer, starting at offset for * length characters, into the termBuffer array. * @param buffer the buffer to copy * @param offset the index in the buffer of the first character to copy * @param length the number of characters to copy */ - public void setTermBuffer(char[] buffer, int offset, int length) { - char[] newCharBuffer = growTermBuffer(length); - if (newCharBuffer != null) { - termBuffer = newCharBuffer; - } - System.arraycopy(buffer, offset, termBuffer, 0, length); - termLength = length; - } + public void setTermBuffer(char[] buffer, int offset, int length); /** Copies the contents of buffer into the termBuffer array. * @param buffer the buffer to copy */ - public void setTermBuffer(String buffer) { - int length = buffer.length(); - char[] newCharBuffer = growTermBuffer(length); - if (newCharBuffer != null) { - termBuffer = newCharBuffer; - } - buffer.getChars(0, length, termBuffer, 0); - termLength = length; - } + public void setTermBuffer(String buffer); /** Copies the contents of buffer, starting at offset and continuing * for length characters, into the termBuffer array. @@ -84,17 +57,8 @@ * @param offset the index in the buffer of the first character to copy * @param length the number of characters to copy */ - public void setTermBuffer(String buffer, int offset, int length) { - assert offset <= buffer.length(); - assert offset + length <= buffer.length(); - char[] newCharBuffer = growTermBuffer(length); - if (newCharBuffer != null) { - termBuffer = newCharBuffer; - } - buffer.getChars(offset, offset + length, termBuffer, 0); - termLength = length; - } - + public void setTermBuffer(String buffer, int offset, int length); + /** Returns the internal termBuffer character array which * you can then directly alter. If the array is too * small for your token, use {@link @@ -102,10 +66,7 @@ * altering the buffer be sure to call {@link * #setTermLength} to record the number of valid * characters that were placed into the termBuffer. */ - public char[] termBuffer() { - initTermBuffer(); - return termBuffer; - } + public char[] termBuffer(); /** Grows the termBuffer to at least size newSize, preserving the * existing content. Note: If the next operation is to change @@ -117,63 +78,12 @@ * @param newSize minimum size of the new termBuffer * @return newly created termBuffer with length >= newSize */ - public char[] resizeTermBuffer(int newSize) { - char[] newCharBuffer = growTermBuffer(newSize); - if (termBuffer == null) { - // If there were termText, then preserve it. - // note that if termBuffer is null then newCharBuffer cannot be null - assert newCharBuffer != null; - termBuffer = newCharBuffer; - } else if (newCharBuffer != null) { - // Note: if newCharBuffer != null then termBuffer needs to grow. - // If there were a termBuffer, then preserve it - System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length); - termBuffer = newCharBuffer; - } - return termBuffer; - } + public char[] resizeTermBuffer(int newSize); - /** Allocates a buffer char[] of at least newSize - * @param newSize minimum size of the buffer - * @return newly created buffer with length >= newSize or null if the current termBuffer is big enough - */ - private char[] growTermBuffer(int newSize) { - if (termBuffer != null) { - if (termBuffer.length >= newSize) - // Already big enough - return null; - else - // Not big enough; create a new array with slight - // over allocation: - return new char[ArrayUtil.getNextSize(newSize)]; - } else { - - // determine the best size - // The buffer is always at least MIN_BUFFER_SIZE - if (newSize < MIN_BUFFER_SIZE) { - newSize = MIN_BUFFER_SIZE; - } - - return new char[newSize]; - } - } - - // TODO: once we remove the deprecated termText() method - // and switch entirely to char[] termBuffer we don't need - // to use this method anymore - private void initTermBuffer() { - if (termBuffer == null) { - termBuffer = new char[MIN_BUFFER_SIZE]; - termLength = 0; - } - } - /** Return number of valid characters (length of the term) * in the termBuffer array. */ - public int termLength() { - return termLength; - } - + public int termLength(); + /** Set number of valid characters (length of the term) in * the termBuffer array. Use this to truncate the termBuffer * or to synchronize with external manipulation of the termBuffer. @@ -181,61 +91,5 @@ * use {@link #resizeTermBuffer(int)} first. * @param length the truncated length */ - public void setTermLength(int length) { - initTermBuffer(); - if (length > termBuffer.length) - throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")"); - termLength = length; - } - - public int hashCode() { - initTermBuffer(); - int code = termLength; - code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength); - return code; - } - - public void clear() { - termLength = 0; - } - - public Object clone() { - TermAttribute t = (TermAttribute)super.clone(); - // Do a deep clone - if (termBuffer != null) { - t.termBuffer = (char[]) termBuffer.clone(); - } - return t; - } - - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof TermAttribute) { - initTermBuffer(); - TermAttribute o = ((TermAttribute) other); - o.initTermBuffer(); - - for(int i=0;i + * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + */ +public class TermAttributeImpl extends AttributeImpl implements TermAttribute, Cloneable, Serializable { + private static int MIN_BUFFER_SIZE = 10; + + private char[] termBuffer; + private int termLength; + + /** Returns the Token's term text. + * + * This method has a performance penalty + * because the text is stored internally in a char[]. If + * possible, use {@link #termBuffer()} and {@link + * #termLength()} directly instead. If you really need a + * String, use this method, which is nothing more than + * a convenience call to new String(token.termBuffer(), 0, token.termLength()) + */ + public String term() { + initTermBuffer(); + return new String(termBuffer, 0, termLength); + } + + /** Copies the contents of buffer, starting at offset for + * length characters, into the termBuffer array. + * @param buffer the buffer to copy + * @param offset the index in the buffer of the first character to copy + * @param length the number of characters to copy + */ + public void setTermBuffer(char[] buffer, int offset, int length) { + char[] newCharBuffer = growTermBuffer(length); + if (newCharBuffer != null) { + termBuffer = newCharBuffer; + } + System.arraycopy(buffer, offset, termBuffer, 0, length); + termLength = length; + } + + /** Copies the contents of buffer into the termBuffer array. + * @param buffer the buffer to copy + */ + public void setTermBuffer(String buffer) { + int length = buffer.length(); + char[] newCharBuffer = growTermBuffer(length); + if (newCharBuffer != null) { + termBuffer = newCharBuffer; + } + buffer.getChars(0, length, termBuffer, 0); + termLength = length; + } + + /** Copies the contents of buffer, starting at offset and continuing + * for length characters, into the termBuffer array. + * @param buffer the buffer to copy + * @param offset the index in the buffer of the first character to copy + * @param length the number of characters to copy + */ + public void setTermBuffer(String buffer, int offset, int length) { + assert offset <= buffer.length(); + assert offset + length <= buffer.length(); + char[] newCharBuffer = growTermBuffer(length); + if (newCharBuffer != null) { + termBuffer = newCharBuffer; + } + buffer.getChars(offset, offset + length, termBuffer, 0); + termLength = length; + } + + /** Returns the internal termBuffer character array which + * you can then directly alter. If the array is too + * small for your token, use {@link + * #resizeTermBuffer(int)} to increase it. After + * altering the buffer be sure to call {@link + * #setTermLength} to record the number of valid + * characters that were placed into the termBuffer. */ + public char[] termBuffer() { + initTermBuffer(); + return termBuffer; + } + + /** Grows the termBuffer to at least size newSize, preserving the + * existing content. Note: If the next operation is to change + * the contents of the term buffer use + * {@link #setTermBuffer(char[], int, int)}, + * {@link #setTermBuffer(String)}, or + * {@link #setTermBuffer(String, int, int)} + * to optimally combine the resize with the setting of the termBuffer. + * @param newSize minimum size of the new termBuffer + * @return newly created termBuffer with length >= newSize + */ + public char[] resizeTermBuffer(int newSize) { + char[] newCharBuffer = growTermBuffer(newSize); + if (termBuffer == null) { + // If there were termText, then preserve it. + // note that if termBuffer is null then newCharBuffer cannot be null + assert newCharBuffer != null; + termBuffer = newCharBuffer; + } else if (newCharBuffer != null) { + // Note: if newCharBuffer != null then termBuffer needs to grow. + // If there were a termBuffer, then preserve it + System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length); + termBuffer = newCharBuffer; + } + return termBuffer; + } + + /** Allocates a buffer char[] of at least newSize + * @param newSize minimum size of the buffer + * @return newly created buffer with length >= newSize or null if the current termBuffer is big enough + */ + private char[] growTermBuffer(int newSize) { + if (termBuffer != null) { + if (termBuffer.length >= newSize) + // Already big enough + return null; + else + // Not big enough; create a new array with slight + // over allocation: + return new char[ArrayUtil.getNextSize(newSize)]; + } else { + + // determine the best size + // The buffer is always at least MIN_BUFFER_SIZE + if (newSize < MIN_BUFFER_SIZE) { + newSize = MIN_BUFFER_SIZE; + } + + return new char[newSize]; + } + } + + // TODO: once we remove the deprecated termText() method + // and switch entirely to char[] termBuffer we don't need + // to use this method anymore + private void initTermBuffer() { + if (termBuffer == null) { + termBuffer = new char[MIN_BUFFER_SIZE]; + termLength = 0; + } + } + + /** Return number of valid characters (length of the term) + * in the termBuffer array. */ + public int termLength() { + return termLength; + } + + /** Set number of valid characters (length of the term) in + * the termBuffer array. Use this to truncate the termBuffer + * or to synchronize with external manipulation of the termBuffer. + * Note: to grow the size of the array, + * use {@link #resizeTermBuffer(int)} first. + * @param length the truncated length + */ + public void setTermLength(int length) { + initTermBuffer(); + if (length > termBuffer.length) + throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")"); + termLength = length; + } + + public int hashCode() { + initTermBuffer(); + int code = termLength; + code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength); + return code; + } + + public void clear() { + termLength = 0; + } + + public Object clone() { + TermAttributeImpl t = (TermAttributeImpl)super.clone(); + // Do a deep clone + if (termBuffer != null) { + t.termBuffer = (char[]) termBuffer.clone(); + } + return t; + } + + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other instanceof TermAttribute) { + initTermBuffer(); + TermAttributeImpl o = ((TermAttributeImpl) other); + o.initTermBuffer(); + + for(int i=0;i */ -public class TypeAttribute extends Attribute implements Cloneable, Serializable { - private String type; - public static final String DEFAULT_TYPE = "word"; - - public TypeAttribute() { - this(DEFAULT_TYPE); - } - - public TypeAttribute(String type) { - this.type = type; - } - +public interface TypeAttribute extends TokenAttribute { /** Returns this Token's lexical type. Defaults to "word". */ - public String type() { - return type; - } + public String type(); /** Set the lexical type. @see #type() */ - public void setType(String type) { - this.type = type; - } - - public void clear() { - type = DEFAULT_TYPE; - } - - public String toString() { - return "type=" + type; - } - - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof TypeAttribute) { - return type.equals(((TypeAttribute) other).type); - } - - return false; - } - - public int hashCode() { - return type.hashCode(); - } - - public void copyTo(Attribute target) { - TypeAttribute t = (TypeAttribute) target; - t.setType(new String(type)); - } + public void setType(String type); } Index: src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java (revision 0) @@ -0,0 +1,79 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.util.AttributeImpl; + +/** + * A Token's lexical type. The Default value is "word". + * + *

+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + */ +public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable, Serializable { + private String type; + public static final String DEFAULT_TYPE = "word"; + + public TypeAttributeImpl() { + this(DEFAULT_TYPE); + } + + public TypeAttributeImpl(String type) { + this.type = type; + } + + /** Returns this Token's lexical type. Defaults to "word". */ + public String type() { + return type; + } + + /** Set the lexical type. + @see #type() */ + public void setType(String type) { + this.type = type; + } + + public void clear() { + type = DEFAULT_TYPE; + } + + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other instanceof TypeAttributeImpl) { + return type.equals(((TypeAttributeImpl) other).type); + } + + return false; + } + + public int hashCode() { + return type.hashCode(); + } + + public void copyTo(AttributeImpl target) { + TypeAttribute t = (TypeAttribute) target; + t.setType(new String(type)); + } +} Index: src/java/org/apache/lucene/analysis/TokenFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenFilter.java (revision 792427) +++ src/java/org/apache/lucene/analysis/TokenFilter.java (working copy) @@ -42,7 +42,7 @@ super(input); this.input = input; } - + /** Close the input TokenStream. */ public void close() throws IOException { input.close(); @@ -53,17 +53,4 @@ super.reset(); input.reset(); } - - public boolean useNewAPI() { - return input.useNewAPI(); - } - - /** - * Sets whether or not to use the new TokenStream API. Settings this - * will apply to this Filter and all TokenStream/Filters upstream. - */ - public void setUseNewAPI(boolean use) { - input.setUseNewAPI(use); - } - } Index: src/java/org/apache/lucene/analysis/Tokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/Tokenizer.java (revision 792427) +++ src/java/org/apache/lucene/analysis/Tokenizer.java (working copy) @@ -24,17 +24,10 @@

This is an abstract class.

- NOTE: In order to enable the new API the method - {@link #useNewAPI()} has to be called with useNewAPI=true. - Otherwise the deprecated method {@link #next(Token)} will - be used by Lucene consumers (indexer and queryparser) to - consume the tokens. {@link #next(Token)} will be removed - in Lucene 3.0. -

NOTE: To use the old API subclasses must override {@link #next(Token)}. It's also OK to instead override {@link #next()} but that method is slower compared to {@link #next(Token)}. -

+

NOTE: subclasses overriding {@link #next(Token)} must call {@link Token#clear()}. *

Index: src/java/org/apache/lucene/analysis/TokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenStream.java (revision 792427) +++ src/java/org/apache/lucene/analysis/TokenStream.java (working copy) @@ -21,8 +21,10 @@ import java.util.Iterator; import org.apache.lucene.index.Payload; -import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.Attribute; // javadocs +import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.analysis.tokenattributes.*; /** A TokenStream enumerates the sequence of tokens, either from fields of a document or from query text. @@ -36,13 +38,13 @@ A new TokenStream API is introduced with Lucene 2.9. Since 2.9 Token is deprecated and the preferred way to store - the information of a token is to use {@link Attribute}s. + the information of a token is to use {@link AttributeImpl}s.

For that reason TokenStream extends {@link AttributeSource} - now. Note that only one instance per {@link Attribute} is + now. Note that only one instance per {@link AttributeImpl} is created and reused for every token. This approach reduces object creations and allows local caching of references to - the {@link Attribute}s. See {@link #incrementToken()} for further details. + the {@link AttributeImpl}s. See {@link #incrementToken()} for further details.

The workflow of the new TokenStream API is as follows:

    @@ -61,18 +63,7 @@ Sometimes it is desirable to capture a current state of a TokenStream, e. g. for buffering purposes (see {@link CachingTokenFilter}, {@link TeeTokenFilter}/{@link SinkTokenizer}). For this usecase - {@link AttributeSource#captureState()} and {@link AttributeSource#restoreState(AttributeSource)} can be used. -

    - NOTE: In order to enable the new API the method - {@link #useNewAPI()} has to be called with useNewAPI=true. - Otherwise the deprecated method {@link #next(Token)} will - be used by Lucene consumers (indexer and queryparser) to - consume the tokens. {@link #next(Token)} will be removed - in Lucene 3.0. -

    - NOTE: To use the old API subclasses must override {@link #next(Token)}. - It's also OK to instead override {@link #next()} but that - method is slower compared to {@link #next(Token)}. + {@link AttributeSource#captureState} and {@link AttributeSource#restoreState} can be used. *

    * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. * The APIs introduced in these classes with Lucene 2.9 might change in the future. @@ -80,9 +71,14 @@ */ public abstract class TokenStream extends AttributeSource { - private static boolean useNewAPIDefault = false; - private boolean useNewAPI = useNewAPIDefault; + + /** @deprecated */ + private TokenWrapper tokenWrapper; + private boolean hasIncrementToken, hasReusableNext, hasNext; + /** @deprecated */ + private static boolean onlyUseNewAPI = false; + protected TokenStream() { super(); } @@ -90,68 +86,102 @@ protected TokenStream(AttributeSource input) { super(input); } - + /** - * Returns whether or not the new TokenStream APIs are used - * by default. - * (see {@link #incrementToken()}, {@link AttributeSource}). + * For extra performance you can globally enable the new {@link #incrementToken} + * API using {@link Attribute}s. There will be a small, but neglectible performance + * increase by enabling this, but it only works if all TokenStreams and -Filters + * use the new API and implement {@link #incrementToken}. This setting can only be enabled + * globally. + *

    All core analyzers are compatible with this setting, if you have own + * TokenStreams/-Filters, that are also compatible, enable this. + *

    When enabled, tokenization may throw {@link UnsupportedOperationException}s, + * if the whole tokenizer chain is not compatible. + *

    The default is false, so there is the fallback to the old API available. + * @deprecated This setting will be true per default in Lucene 3.0, + * when {@link #incrementToken} is abstract and must be always implemented. */ - public static boolean useNewAPIDefault() { - return useNewAPIDefault; + public static void setOnlyUseNewAPI(boolean onlyUseNewAPI) { + TokenStream.onlyUseNewAPI = onlyUseNewAPI; } - - /** - * Use this API to enable or disable the new TokenStream API. - * by default. Can be overridden by calling {@link #setUseNewAPI(boolean)}. - * (see {@link #incrementToken()}, {@link AttributeSource}). - *

    - * If set to true, the indexer will call {@link #incrementToken()} - * to consume Tokens from this stream. - *

    - * If set to false, the indexer will call {@link #next(Token)} - * instead. + + /** Returns if only the new API is used. + * @see #setOnlyUseNewAPI */ - public static void setUseNewAPIDefault(boolean use) { - useNewAPIDefault = use; + public static boolean getOnlyUseNewAPI() { + return onlyUseNewAPI; } - /** - * Returns whether or not the new TokenStream APIs are used - * for this stream. - * (see {@link #incrementToken()}, {@link AttributeSource}). - */ - public boolean useNewAPI() { - return useNewAPI; - } + protected void initialize() { + // TODO: remove this when old API is removed + tokenWrapper = null; + + // use reflection to find out, which methods this class has overridden + try { + final Class[] EMPTY_PARAMS=new Class[0]; + hasIncrementToken = this.getClass().getMethod("incrementToken",EMPTY_PARAMS).getDeclaringClass() != TokenStream.class; + hasNext = this.getClass().getMethod("next",EMPTY_PARAMS).getDeclaringClass() != TokenStream.class; + hasReusableNext = this.getClass().getMethod("next",new Class[]{Token.class}).getDeclaringClass() != TokenStream.class; + } catch (NoSuchMethodException e) { + // should not happen + throw new RuntimeException(e); + } + + /*System.out.println(this.getClass()+" supports: incrementToken()="+ + hasIncrementToken+" next()="+hasNext+" next(Token)="+hasReusableNext);*/ + // if no method was overridden, this should fail early to prevent stack overflows + + // a TokenStream subclass must at least implement one of the methods! + if (!(hasIncrementToken || hasNext || hasReusableNext)) + throw new UnsupportedOperationException(getClass().getName()+" does not implement any of incrementToken(), next(Token), next()."); + + if (onlyUseNewAPI) { + + // to only use the new API, at least incrementToken must be implemented! + if (!hasIncrementToken) + throw new UnsupportedOperationException(getClass().getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI."); + + } else { - /** - * Use this API to enable or disable the new TokenStream API - * for this stream. Overrides {@link #setUseNewAPIDefault(boolean)}. - * (see {@link #incrementToken()}, {@link AttributeSource}). - *

    - * If set to true, the indexer will call {@link #incrementToken()} - * to consume Tokens from this stream. - *

    - * If set to false, the indexer will call {@link #next(Token)} - * instead. - *

    - * NOTE: All streams and filters in one chain must use the - * same API. - */ - public void setUseNewAPI(boolean use) { - useNewAPI = use; + // initialize the wrapper instance + addAttributeImpl(new TokenWrapper()); + if ( + // check that the basic attributes are all TokenWrapper instances, + // and no one registered another instance before. + getAttribute(TermAttribute.class) instanceof TokenWrapper && + getAttribute(TypeAttribute.class) instanceof TokenWrapper && + getAttribute(PositionIncrementAttribute.class) instanceof TokenWrapper && + getAttribute(FlagsAttribute.class) instanceof TokenWrapper && + getAttribute(OffsetAttribute.class) instanceof TokenWrapper && + getAttribute(PayloadAttribute.class) instanceof TokenWrapper + ) { + // get any attribute (its always the same, a Token) + tokenWrapper = (TokenWrapper) getAttribute(TermAttribute.class); + } + + } } + + /** @deprecated */ + private void checkTokenWrapper() { + if (tokenWrapper == null) throw new UnsupportedOperationException( + "The basic token attributes are not implemented by the default TokenWrapper instance, "+ + "because either onlyUseNewAPI==false or you have registered own Attribute instances. "+ + "In this case, all TokenStreams and TokenFilters in the chain must implement incrementToken() "+ + "and consumers must only call incrementToken()." + ); + } /** * Consumers (e. g. the indexer) use this method to advance the stream * to the next token. Implementing classes must implement this method - * and update the appropriate {@link Attribute}s with content of the + * and update the appropriate {@link AttributeImpl}s with content of the * next token. *

    * This method is called for every token of a document, so an efficient * implementation is crucial for good performance. To avoid calls to * {@link #addAttribute(Class)} and {@link #getAttribute(Class)} and - * downcasts, references to all {@link Attribute}s that this stream uses + * downcasts, references to all {@link AttributeImpl}s that this stream uses * should be retrieved during instantiation. *

    * To make sure that filters and consumers know which attributes are available @@ -164,29 +194,21 @@ * Note that this method will be defined abstract in Lucene 3.0. */ public boolean incrementToken() throws IOException { - // subclasses must implement this method; will be made abstract in Lucene 3.0 - return false; + checkTokenWrapper(); + + final Token token; + if (hasReusableNext) { + token = next(tokenWrapper.delegate); + } else { + assert hasNext; + token = next(); + } + if (token == null) return false; + tokenWrapper.delegate = token; + return true; } /** Returns the next token in the stream, or null at EOS. - * @deprecated The returned Token is a "full private copy" (not - * re-used across calls to next()) but will be slower - * than calling {@link #next(Token)} instead.. */ - public Token next() throws IOException { - final Token reusableToken = new Token(); - Token nextToken = next(reusableToken); - - if (nextToken != null) { - Payload p = nextToken.getPayload(); - if (p != null) { - nextToken.setPayload((Payload) p.clone()); - } - } - - return nextToken; - } - - /** Returns the next token in the stream, or null at EOS. * When possible, the input Token should be used as the * returned Token (this gives fastest tokenization * performance), but this is not required and a new Token @@ -215,14 +237,42 @@ * good idea to assert that it is not null.) * @return next token in the stream or null if end-of-stream was hit * @deprecated The new {@link #incrementToken()} and {@link AttributeSource} - * APIs should be used instead. See also {@link #useNewAPI()}. + * APIs should be used instead. */ public Token next(final Token reusableToken) throws IOException { - // We don't actually use inputToken, but still add this assert assert reusableToken != null; - return next(); + checkTokenWrapper(); + + if (hasIncrementToken) { + tokenWrapper.delegate = reusableToken; + return incrementToken() ? tokenWrapper.delegate : null; + } else { + assert hasNext; + final Token token = next(); + if (token == null) return null; + tokenWrapper.delegate = token; + return token; + } } + /** Returns the next token in the stream, or null at EOS. + * @deprecated The returned Token is a "full private copy" (not + * re-used across calls to next()) but will be slower + * than calling {@link #next(Token)} instead. */ + public Token next() throws IOException { + checkTokenWrapper(); + + if (hasIncrementToken) { + return incrementToken() ? ((Token) tokenWrapper.delegate.clone()) : null; + } else { + assert hasReusableNext; + final Token token = next(tokenWrapper.delegate); + if (token == null) return null; + tokenWrapper.delegate = token; + return (Token) token.clone(); + } + } + /** Resets this stream to the beginning. This is an * optional operation, so subclasses may or may not * implement this method. Reset() is not needed for @@ -259,5 +309,5 @@ sb.append(')'); return sb.toString(); } - + } Index: src/java/org/apache/lucene/analysis/TokenWrapper.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenWrapper.java (revision 0) +++ src/java/org/apache/lucene/analysis/TokenWrapper.java (revision 0) @@ -0,0 +1,163 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.index.Payload; +import org.apache.lucene.util.AttributeImpl; + +/** + * This class wraps a Token and supplies a single attribute instance + * where the delegate token can be replaced. + * @deprecated Will be removed, when old TokenStream API is removed. + */ +final class TokenWrapper extends AttributeImpl + implements Cloneable, TermAttribute, TypeAttribute, PositionIncrementAttribute, + FlagsAttribute, OffsetAttribute, PayloadAttribute { + + Token delegate; + + TokenWrapper() { + this(new Token()); + } + + TokenWrapper(Token delegate) { + this.delegate = delegate; + } + + // TermAttribute: + + public String term() { + return delegate.term(); + } + + public void setTermBuffer(char[] buffer, int offset, int length) { + delegate.setTermBuffer(buffer, offset, length); + } + + public void setTermBuffer(String buffer) { + delegate.setTermBuffer(buffer); + } + + public void setTermBuffer(String buffer, int offset, int length) { + delegate.setTermBuffer(buffer, offset, length); + } + + public char[] termBuffer() { + return delegate.termBuffer(); + } + + public char[] resizeTermBuffer(int newSize) { + return delegate.resizeTermBuffer(newSize); + } + + public int termLength() { + return delegate.termLength(); + } + + public void setTermLength(int length) { + delegate.setTermLength(length); + } + + // TypeAttribute: + + public String type() { + return delegate.type(); + } + + public void setType(String type) { + delegate.setType(type); + } + + public void setPositionIncrement(int positionIncrement) { + delegate.setPositionIncrement(positionIncrement); + } + + public int getPositionIncrement() { + return delegate.getPositionIncrement(); + } + + // FlagsAttribute + + public int getFlags() { + return delegate.getFlags(); + } + + public void setFlags(int flags) { + delegate.setFlags(flags); + } + + // OffsetAttribute + + public int startOffset() { + return delegate.startOffset(); + } + + public void setOffset(int startOffset, int endOffset) { + delegate.setOffset(startOffset, endOffset); + } + + public int endOffset() { + return delegate.endOffset(); + } + + // PayloadAttribute + public Payload getPayload() { + return delegate.getPayload(); + } + + public void setPayload(Payload payload) { + delegate.setPayload(payload); + } + + // TokenAttribute + + public void clear() { + delegate.clear(); + } + + // AttributeImpl + + public String toString() { + return delegate.toString(); + } + + public int hashCode() { + return delegate.hashCode(); + } + + public boolean equals(Object other) { + if (other instanceof TokenWrapper) { + return ((TokenWrapper) other).delegate.equals(this.delegate); + } + return false; + } + + public Object clone() { + return new TokenWrapper((Token) delegate.clone()); + } + + public void copyTo(AttributeImpl target) { + ((TokenWrapper) target).delegate.reinit(this.delegate); + } +} Property changes on: src\java\org\apache\lucene\analysis\TokenWrapper.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- src/java/org/apache/lucene/index/DocInverterPerField.java (revision 792427) +++ src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -20,7 +20,6 @@ import java.io.IOException; import java.io.Reader; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -83,7 +82,6 @@ final int valueLength = stringValue.length(); perThread.singleTokenTokenStream.reinit(stringValue, 0, valueLength); fieldState.attributeSource = perThread.singleTokenTokenStream; - perThread.localTokenStream.reset(); consumer.start(field); boolean success = false; @@ -132,21 +130,15 @@ try { int offsetEnd = fieldState.offset-1; - boolean useNewTokenStreamAPI = stream.useNewAPI(); - Token localToken = null; - - if (useNewTokenStreamAPI) { - fieldState.attributeSource = stream; - } else { - fieldState.attributeSource = perThread.localTokenStream; - localToken = perThread.localToken; - } - - consumer.start(field); + boolean hasMoreTokens = stream.incrementToken(); + fieldState.attributeSource = stream; + OffsetAttribute offsetAttribute = (OffsetAttribute) fieldState.attributeSource.addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute) fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class); + consumer.start(field); + for(;;) { // If we hit an exception in stream.next below @@ -155,15 +147,9 @@ // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID - Token token = null; - if (useNewTokenStreamAPI) { - if (!stream.incrementToken()) break; - } else { - token = stream.next(localToken); - if (token == null) break; - perThread.localTokenStream.set(token); - } + if (!hasMoreTokens) break; + final int posIncr = posIncrAttribute.getPositionIncrement(); fieldState.position += posIncr; if (allowMinus1Position || fieldState.position > 0) { @@ -194,6 +180,8 @@ docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens"); break; } + + hasMoreTokens = stream.incrementToken(); } fieldState.offset = offsetEnd+1; } finally { Index: src/java/org/apache/lucene/index/DocInverterPerThread.java =================================================================== --- src/java/org/apache/lucene/index/DocInverterPerThread.java (revision 792427) +++ src/java/org/apache/lucene/index/DocInverterPerThread.java (working copy) @@ -19,15 +19,9 @@ import java.io.IOException; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.util.Attribute; /** This is a DocFieldConsumer that inverts each field, * separately, from a Document, and accepts a @@ -37,10 +31,8 @@ final DocInverter docInverter; final InvertedDocConsumerPerThread consumer; final InvertedDocEndConsumerPerThread endConsumer; - final Token localToken = new Token(); //TODO: change to SingleTokenTokenStream after Token was removed final SingleTokenTokenStream singleTokenTokenStream = new SingleTokenTokenStream(); - final BackwardsCompatibilityStream localTokenStream = new BackwardsCompatibilityStream(); static class SingleTokenTokenStream extends TokenStream { TermAttribute termAttribute; @@ -55,75 +47,12 @@ termAttribute.setTermBuffer(stringValue); offsetAttribute.setOffset(startOffset, endOffset); } - } - - /** This stream wrapper is only used to maintain backwards compatibility with the - * old TokenStream API and can be removed in Lucene 3.0 - * @deprecated - */ - static class BackwardsCompatibilityStream extends TokenStream { - private Token token; - - TermAttribute termAttribute = new TermAttribute() { - public String term() { - return token.term(); - } - - public char[] termBuffer() { - return token.termBuffer(); - } - - public int termLength() { - return token.termLength(); - } - }; - OffsetAttribute offsetAttribute = new OffsetAttribute() { - public int startOffset() { - return token.startOffset(); - } - - public int endOffset() { - return token.endOffset(); - } - }; - PositionIncrementAttribute positionIncrementAttribute = new PositionIncrementAttribute() { - public int getPositionIncrement() { - return token.getPositionIncrement(); - } - }; - - FlagsAttribute flagsAttribute = new FlagsAttribute() { - public int getFlags() { - return token.getFlags(); - } - }; - - PayloadAttribute payloadAttribute = new PayloadAttribute() { - public Payload getPayload() { - return token.getPayload(); - } - }; - - TypeAttribute typeAttribute = new TypeAttribute() { - public String type() { - return token.type(); - } - }; - - BackwardsCompatibilityStream() { - attributes.put(TermAttribute.class, termAttribute); - attributes.put(OffsetAttribute.class, offsetAttribute); - attributes.put(PositionIncrementAttribute.class, positionIncrementAttribute); - attributes.put(FlagsAttribute.class, flagsAttribute); - attributes.put(PayloadAttribute.class, payloadAttribute); - attributes.put(TypeAttribute.class, typeAttribute); + // this is a dummy, to not throw an UOE because this class does not implement any iteration method + public boolean incrementToken() { + throw new UnsupportedOperationException(); } - - public void set(Token token) { - this.token = token; - } - }; + } final DocumentsWriter.DocState docState; Index: src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 792427) +++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -531,67 +531,42 @@ PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; - org.apache.lucene.analysis.Token reusableToken = null; - org.apache.lucene.analysis.Token nextToken = null; - - - boolean useNewAPI = TokenStream.useNewAPIDefault(); - - if (useNewAPI) { - boolean success = false; - try { - buffer.reset(); - success = true; - } catch (IOException e) { - // success==false if we hit an exception + boolean success = false; + try { + buffer.reset(); + success = true; + } catch (IOException e) { + // success==false if we hit an exception + } + if (success) { + if (buffer.hasAttribute(TermAttribute.class)) { + termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class); } - if (success) { - if (buffer.hasAttribute(TermAttribute.class)) { - termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class); - } - if (buffer.hasAttribute(PositionIncrementAttribute.class)) { - posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class); - } + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class); } - } else { - reusableToken = new org.apache.lucene.analysis.Token(); } int positionCount = 0; boolean severalTokensAtSamePosition = false; - if (useNewAPI) { - if (termAtt != null) { - try { - while (buffer.incrementToken()) { - numTokens++; - int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; - if (positionIncrement != 0) { - positionCount += positionIncrement; - } else { - severalTokensAtSamePosition = true; - } + boolean hasMoreTokens = false; + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; } - } catch (IOException e) { - // ignore + hasMoreTokens = buffer.incrementToken(); } + } catch (IOException e) { + // ignore } - } else { - while (true) { - try { - nextToken = buffer.next(reusableToken); - } - catch (IOException e) { - nextToken = null; - } - if (nextToken == null) - break; - numTokens++; - if (nextToken.getPositionIncrement() != 0) - positionCount += nextToken.getPositionIncrement(); - else - severalTokensAtSamePosition = true; - } } try { // rewind the buffer stream @@ -609,16 +584,9 @@ else if (numTokens == 1) { String term = null; try { - - if (useNewAPI) { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - term = termAtt.term(); - } else { - nextToken = buffer.next(reusableToken); - assert nextToken != null; - term = nextToken.term(); - } + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.term(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } @@ -631,15 +599,9 @@ for (int i = 0; i < numTokens; i++) { String term = null; try { - if (useNewAPI) { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - term = termAtt.term(); - } else { - nextToken = buffer.next(reusableToken); - assert nextToken != null; - term = nextToken.term(); - } + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.term(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } @@ -660,18 +622,11 @@ String term = null; int positionIncrement = 1; try { - if (useNewAPI) { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - term = termAtt.term(); - if (posIncrAtt != null) { - positionIncrement = posIncrAtt.getPositionIncrement(); - } - } else { - nextToken = buffer.next(reusableToken); - assert nextToken != null; - term = nextToken.term(); - positionIncrement = nextToken.getPositionIncrement(); + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.term(); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens @@ -707,19 +662,11 @@ int positionIncrement = 1; try { - if (useNewAPI) { - - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - term = termAtt.term(); - if (posIncrAtt != null) { - positionIncrement = posIncrAtt.getPositionIncrement(); - } - } else { - nextToken = buffer.next(reusableToken); - assert nextToken != null; - term = nextToken.term(); - positionIncrement = nextToken.getPositionIncrement(); + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.term(); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens Index: src/java/org/apache/lucene/search/QueryTermVector.java =================================================================== --- src/java/org/apache/lucene/search/QueryTermVector.java (revision 792427) +++ src/java/org/apache/lucene/search/QueryTermVector.java (working copy) @@ -27,7 +27,6 @@ import java.util.Map; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.TermFreqVector; @@ -59,17 +58,15 @@ { List terms = new ArrayList(); try { - if (stream.useNewAPI()) { - stream.reset(); - TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); - while (stream.incrementToken()) { - terms.add(termAtt.term()); - } - } else { - final Token reusableToken = new Token(); - for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { - terms.add(nextToken.term()); - } + boolean hasMoreTokens = false; + + stream.reset(); + TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); + + hasMoreTokens = stream.incrementToken(); + while (hasMoreTokens) { + terms.add(termAtt.term()); + hasMoreTokens = stream.incrementToken(); } processTerms((String[])terms.toArray(new String[terms.size()])); } catch (IOException e) { Index: src/java/org/apache/lucene/util/Attribute.java =================================================================== --- src/java/org/apache/lucene/util/Attribute.java (revision 792427) +++ src/java/org/apache/lucene/util/Attribute.java (working copy) @@ -1,95 +1,5 @@ package org.apache.lucene.util; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +public interface Attribute { -import java.io.Serializable; - -/** - * Base class for Attributes that can be added to a - * {@link org.apache.lucene.util.AttributeSource}. - *

    - * Attributes are used to add data in a dynamic, yet type-safe way to a source - * of usually streamed objects, e. g. a {@link org.apache.lucene.analysis.TokenStream}. - *

    - * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. - * The APIs introduced in these classes with Lucene 2.9 might change in the future. - * We will make our best efforts to keep the APIs backwards-compatible. - */ -public abstract class Attribute implements Cloneable, Serializable { - /** - * Clears the values in this Attribute and resets it to its - * default value. - */ - public abstract void clear(); - - /** - * Subclasses must implement this method and should follow a syntax - * similar to this one: - * - *

    -   *   public String toString() {
    -   *     return "start=" + startOffset + ",end=" + endOffset;
    -   *   }
    -   * 
    - */ - public abstract String toString(); - - /** - * Subclasses must implement this method and should compute - * a hashCode similar to this: - *
    -   *   public int hashCode() {
    -   *     int code = startOffset;
    -   *     code = code * 31 + endOffset;
    -   *     return code;
    -   *   }
    -   * 
    - * - * see also {@link #equals(Object)} - */ - public abstract int hashCode(); - - /** - * All values used for computation of {@link #hashCode()} - * should be checked here for equality. - * - * see also {@link Object#equals(Object)} - */ - public abstract boolean equals(Object other); - - /** - * Copies the values from this Attribute into the passed-in - * target attribute. The type of the target must match the type - * of this attribute. - */ - public abstract void copyTo(Attribute target); - - /** - * Shallow clone. Subclasses must override this if they - * need to clone any members deeply, - */ - public Object clone() { - Object clone = null; - try { - clone = super.clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); // shouldn't happen - } - return clone; - } } Index: src/java/org/apache/lucene/util/AttributeImpl.java =================================================================== --- src/java/org/apache/lucene/util/AttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/util/AttributeImpl.java (revision 0) @@ -0,0 +1,123 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; +import java.lang.reflect.Field; + +/** + * Base class for Attributes that can be added to a + * {@link org.apache.lucene.util.AttributeSource}. + *

    + * Attributes are used to add data in a dynamic, yet type-safe way to a source + * of usually streamed objects, e. g. a {@link org.apache.lucene.analysis.TokenStream}. + *

    + * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + */ +public abstract class AttributeImpl implements Cloneable, Serializable { + /** + * Clears the values in this Attribute and resets it to its + * default value. + */ + public abstract void clear(); + + /** + * The default implementation of this method accesses all declared + * fields of this object and prints the values in the following syntax: + * + *

    +   *   public String toString() {
    +   *     return "start=" + startOffset + ",end=" + endOffset;
    +   *   }
    +   * 
    + * + * This method may be overridden by subclasses. + */ + public String toString() { + StringBuffer buffer = new StringBuffer(); + Class clazz = this.getClass(); + Field[] fields = clazz.getDeclaredFields(); + try { + for (int i = 0; i < fields.length; i++) { + Field f = fields[i]; + f.setAccessible(true); + Object value = f.get(this); + if (value == null) { + buffer.append(f.getName() + "=null"); + } else { + buffer.append(f.getName() + "=" + value); + } + if (i < fields.length - 1) { + buffer.append(','); + } + } + } catch (IllegalAccessException e) { + // this should never happen, because we're just accessing fields + // from 'this' + throw new RuntimeException(e); + } + + return buffer.toString(); + } + + /** + * Subclasses must implement this method and should compute + * a hashCode similar to this: + *
    +   *   public int hashCode() {
    +   *     int code = startOffset;
    +   *     code = code * 31 + endOffset;
    +   *     return code;
    +   *   }
    +   * 
    + * + * see also {@link #equals(Object)} + */ + public abstract int hashCode(); + + /** + * All values used for computation of {@link #hashCode()} + * should be checked here for equality. + * + * see also {@link Object#equals(Object)} + */ + public abstract boolean equals(Object other); + + /** + * Copies the values from this Attribute into the passed-in + * target attribute. The type of the target must match the type + * of this attribute. + */ + public abstract void copyTo(AttributeImpl target); + + /** + * Shallow clone. Subclasses must override this if they + * need to clone any members deeply, + */ + public Object clone() { + Object clone = null; + try { + clone = super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); // shouldn't happen + } + return clone; + } +} Index: src/java/org/apache/lucene/util/AttributeSource.java =================================================================== --- src/java/org/apache/lucene/util/AttributeSource.java (revision 792427) +++ src/java/org/apache/lucene/util/AttributeSource.java (working copy) @@ -19,13 +19,14 @@ import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.Map; +import java.util.Set; import org.apache.lucene.analysis.TokenStream; - /** - * An AttributeSource contains a list of different {@link Attribute}s, + * An AttributeSource contains a list of different {@link AttributeImpl}s, * and methods to add and get them. There can only be a single instance * of an attribute in the same AttributeSource instance. This is ensured * by passing in the actual type of the Attribute (Class<Attribute>) to @@ -39,7 +40,103 @@ * We will make our best efforts to keep the APIs backwards-compatible.
    */ public class AttributeSource { + public static abstract class AttributeFactory { + public abstract AttributeImpl createAttributeInstance(Class attClass); + + public static final AttributeFactory DefaultAttributeFactory = new AttributeFactory() { + public AttributeImpl createAttributeInstance(Class attClass) { + try { + return (AttributeImpl) resolveClassName(attClass.getName()).newInstance(); + } catch (InstantiationException e) { + throw new IllegalArgumentException("Could not instantiate class " + attClass); + } catch (IllegalAccessException e) { + throw new IllegalArgumentException("Could not instantiate class " + attClass); + } + } + + protected Class resolveClassName(String attClassName) { + try { + attClassName += "Impl"; + return Class.forName(attClassName); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("Could not find implementing class " + attClassName); + } + } + }; + } + + public static final class State implements Cloneable { + private AttributeImpl attribute; + private State next; + + public Object clone() { + State clone = new State(); + clone.attribute = (AttributeImpl) attribute.clone(); + + if (next != null) { + clone.next = (State) next.clone(); + } + + return clone; + } + } + + private State currentState; + + private void computeCurrentState() { + // find all unique AttributeImpls + Set impls = new LinkedHashSet(); + impls.addAll(attributes.values()); + Iterator it = impls.iterator(); + currentState = new State(); + + State c = currentState; + c.attribute = (AttributeImpl) it.next(); + while (it.hasNext()) { + c.next = new State(); + c = c.next; + c.attribute = (AttributeImpl) it.next(); + } + } + + public State captureState() { + if (currentState == null) { + computeCurrentState(); + } + return (State) this.currentState.clone(); + } + /** + * Restores this state by copying the values of all attributes + * that this state contains into the attributes of the targetStream. + * The targetStream must contain a corresponding instance for each argument + * contained in this state. + *

    + * Note that this method does not affect attributes of the targetStream + * that are not contained in this state. In other words, if for example + * the targetStream contains an OffsetAttribute, but this state doesn't, then + * the value of the OffsetAttribute remains unchanged. It might be desirable to + * reset its value to the default, in which case the caller should first + * call {@link TokenStream#clearAttributes()} on the targetStream. + */ + public void restoreState(State state) { + if (currentState == null) { + computeCurrentState(); + } + + State source = state; + State target = currentState; + + do { + assert source != null && target != null && source.getClass() == target.getClass(); + source.attribute.copyTo(target.attribute); + source = source.next; + target = target.next; + } while (source != null); + } + + + /** * An AttributeAcceptor defines only a single method {@link #accept(Class)}. * It can be used for e. g. buffering purposes to specify which attributes * to buffer. @@ -56,13 +153,20 @@ public boolean accept(Class attClass) {return true;} }; + private AttributeFactory factory = AttributeFactory.DefaultAttributeFactory; + /** * Holds the Class<Attribute> -> Attribute mapping */ protected Map attributes; + protected void initialize() { + + } + public AttributeSource() { this.attributes = new LinkedHashMap(); + initialize(); } public AttributeSource(AttributeSource input) { @@ -70,8 +174,17 @@ throw new IllegalArgumentException("input AttributeSource must not be null"); } this.attributes = input.attributes; + initialize(); } + public void setAttributeFactory(AttributeFactory factory) { + this.factory = factory; + } + + public AttributeFactory getAttributeFactory() { + return this.factory; + } + /** Returns an iterator that iterates the attributes * in the same order they were added in. */ @@ -79,24 +192,39 @@ return attributes.values().iterator(); } + public void addAttributeImpl(AttributeImpl att) { + // find all interfaces that this attribute instance implements + // and that extend the Attribute interface + Class clazz = att.getClass(); + do { + Class[] interfaces = att.getClass().getInterfaces(); + for (int i = 0; i < interfaces.length; i++) { + Class curInterface = interfaces[i]; + if (Attribute.class.isAssignableFrom(curInterface)) { + // Attribute is a superclass of this interface + if (!attributes.containsKey(curInterface)) { + // invalidate state to force recomputation in captureState() + this.currentState = null; + attributes.put(curInterface, att); + } + } + } + + clazz = clazz.getSuperclass(); + } while (clazz != null); + } + /** * The caller must pass in a Class<? extends Attribute> value. * This method first checks if an instance of that class is * already in this AttributeSource and returns it. Otherwise a * new instance is created, added to this AttributeSource and returned. */ - public Attribute addAttribute(Class attClass) { - Attribute att = (Attribute) attributes.get(attClass); + public AttributeImpl addAttribute(Class attClass) { + AttributeImpl att = (AttributeImpl) attributes.get(attClass); if (att == null) { - try { - att = (Attribute) attClass.newInstance(); - } catch (InstantiationException e) { - throw new IllegalArgumentException("Could not instantiate class " + attClass); - } catch (IllegalAccessException e) { - throw new IllegalArgumentException("Could not instantiate class " + attClass); - } - - attributes.put(attClass, att); + att = this.factory.createAttributeInstance(attClass); + addAttributeImpl(att); } return att; } @@ -121,8 +249,8 @@ * @throws IllegalArgumentException if this AttributeSource does not contain the * Attribute */ - public Attribute getAttribute(Class attClass) { - Attribute att = (Attribute) this.attributes.get(attClass); + public AttributeImpl getAttribute(Class attClass) { + AttributeImpl att = (AttributeImpl) this.attributes.get(attClass); if (att == null) { throw new IllegalArgumentException("This token does not have the attribute '" + attClass + "'."); } @@ -132,12 +260,12 @@ /** * Resets all Attributes in this AttributeSource by calling - * {@link Attribute#clear()} on each Attribute. + * {@link AttributeImpl#clear()} on each Attribute. */ public void clearAttributes() { - Iterator it = getAttributesIterator(); + Iterator it = this.attributes.values().iterator(); while (it.hasNext()) { - ((Attribute) it.next()).clear(); + ((AttributeImpl) it.next()).clear(); } } @@ -145,27 +273,16 @@ * Captures the current state of the passed in TokenStream. *

    * This state will contain all of the passed in TokenStream's - * {@link Attribute}s. If only a subset of the attributes is needed - * please use {@link #captureState(AttributeAcceptor)} + * {@link AttributeImpl}s which the {@link AttributeAcceptor} accepts. */ - public AttributeSource captureState() { - return captureState(AllAcceptor); - } - - /** - * Captures the current state of the passed in TokenStream. - *

    - * This state will contain all of the passed in TokenStream's - * {@link Attribute}s which the {@link AttributeAcceptor} accepts. - */ public AttributeSource captureState(AttributeAcceptor acceptor) { AttributeSource state = new AttributeSource(); Iterator it = getAttributesIterator(); while(it.hasNext()) { - Attribute att = (Attribute) it.next(); + AttributeImpl att = (AttributeImpl) it.next(); if (acceptor.accept(att.getClass())) { - Attribute clone = (Attribute) att.clone(); + AttributeImpl clone = (AttributeImpl) att.clone(); state.attributes.put(att.getClass(), clone); } } @@ -173,28 +290,6 @@ return state; } - /** - * Restores this state by copying the values of all attributes - * that this state contains into the attributes of the targetStream. - * The targetStream must contain a corresponding instance for each argument - * contained in this state. - *

    - * Note that this method does not affect attributes of the targetStream - * that are not contained in this state. In other words, if for example - * the targetStream contains an OffsetAttribute, but this state doesn't, then - * the value of the OffsetAttribute remains unchanged. It might be desirable to - * reset its value to the default, in which case the caller should first - * call {@link TokenStream#clearAttributes()} on the targetStream. - */ - public void restoreState(AttributeSource target) { - Iterator it = getAttributesIterator(); - while (it.hasNext()) { - Attribute att = (Attribute) it.next(); - Attribute targetAtt = target.getAttribute(att.getClass()); - att.copyTo(targetAtt); - } - } - public int hashCode() { int code = 0; if (hasAttributes()) { @@ -228,7 +323,7 @@ while (it.hasNext()) { Class attName = it.next().getClass(); - Attribute otherAtt = (Attribute) other.attributes.get(attName); + AttributeImpl otherAtt = (AttributeImpl) other.attributes.get(attName); if (otherAtt == null || !otherAtt.equals(attributes.get(attName))) { return false; } Index: src/test/org/apache/lucene/analysis/TestNumericTokenStream.java =================================================================== --- src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (revision 792427) +++ src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (working copy) @@ -27,9 +27,8 @@ static final long lvalue = 4573245871874382L; static final int ivalue = 123456; - public void testLongStreamNewAPI() throws Exception { + public void testLongStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue); - stream.setUseNewAPI(true); // use getAttribute to test if attributes really exist, if not an IAE will be throwed final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); for (int shift=0; shift<64; shift+=precisionStep) { @@ -38,21 +37,9 @@ } assertFalse("No more tokens available", stream.incrementToken()); } - - public void testLongStreamOldAPI() throws Exception { - final NumericTokenStream stream=new NumericTokenStream(precisionStep).setLongValue(lvalue); - stream.setUseNewAPI(false); - Token tok=new Token(); - for (int shift=0; shift<64; shift+=precisionStep) { - assertNotNull("New token is available", tok=stream.next(tok)); - assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), tok.term()); - } - assertNull("No more tokens available", stream.next(tok)); - } - public void testIntStreamNewAPI() throws Exception { + public void testIntStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue); - stream.setUseNewAPI(true); // use getAttribute to test if attributes really exist, if not an IAE will be throwed final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); for (int shift=0; shift<32; shift+=precisionStep) { @@ -62,17 +49,6 @@ assertFalse("No more tokens available", stream.incrementToken()); } - public void testIntStreamOldAPI() throws Exception { - final NumericTokenStream stream=new NumericTokenStream(precisionStep).setIntValue(ivalue); - stream.setUseNewAPI(false); - Token tok=new Token(); - for (int shift=0; shift<32; shift+=precisionStep) { - assertNotNull("New token is available", tok=stream.next(tok)); - assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), tok.term()); - } - assertNull("No more tokens available", stream.next(tok)); - } - public void testNotInitialized() throws Exception { final NumericTokenStream stream=new NumericTokenStream(precisionStep); @@ -83,21 +59,12 @@ // pass } - stream.setUseNewAPI(true); try { stream.incrementToken(); fail("incrementToken() should not succeed."); } catch (IllegalStateException e) { // pass } - - stream.setUseNewAPI(false); - try { - stream.next(new Token()); - fail("next() should not succeed."); - } catch (IllegalStateException e) { - // pass - } } } Index: src/test/org/apache/lucene/analysis/TestTeeTokenFilter.java =================================================================== --- src/test/org/apache/lucene/analysis/TestTeeTokenFilter.java (revision 792427) +++ src/test/org/apache/lucene/analysis/TestTeeTokenFilter.java (working copy) @@ -18,9 +18,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.English; import org.apache.lucene.util.LuceneTestCase; @@ -43,8 +40,7 @@ super(s); } - protected void setUp() throws Exception { - super.setUp(); + protected void setUp() { tokens1 = new String[]{"The", "quick", "Burgundy", "Fox", "jumped", "over", "the", "lazy", "Red", "Dogs"}; tokens2 = new String[]{"The", "Lazy", "Dogs", "should", "stay", "on", "the", "porch"}; buffer1 = new StringBuffer(); @@ -66,29 +62,24 @@ public void test() throws IOException { SinkTokenizer sink1 = new SinkTokenizer(null) { - public void add(AttributeSource a) throws IOException { - TermAttribute termAtt = null; - if (a.hasAttribute(TermAttribute.class)) { - termAtt = (TermAttribute) a.getAttribute(TermAttribute.class); + public void add(Token t) { + if (t != null && t.term().equalsIgnoreCase("The")) { + super.add(t); } - if (termAtt != null && termAtt.term().equalsIgnoreCase("The")) { - super.add(a); - } } }; TokenStream source = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())), sink1); int i = 0; - TermAttribute termAtt = (TermAttribute) source.getAttribute(TermAttribute.class); - while (source.incrementToken()) { - assertTrue(termAtt.term() + " is not equal to " + tokens1[i], termAtt.term().equals(tokens1[i]) == true); + final Token reusableToken = new Token(); + for (Token nextToken = source.next(reusableToken); nextToken != null; nextToken = source.next(reusableToken)) { + assertTrue(nextToken.term() + " is not equal to " + tokens1[i], nextToken.term().equals(tokens1[i]) == true); i++; } assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length); assertTrue("sink1 Size: " + sink1.getTokens().size() + " is not: " + 2, sink1.getTokens().size() == 2); i = 0; - termAtt = (TermAttribute) sink1.getAttribute(TermAttribute.class); - while (sink1.incrementToken()) { - assertTrue(termAtt.term() + " is not equal to " + "The", termAtt.term().equalsIgnoreCase("The") == true); + for (Token token = sink1.next(reusableToken); token != null; token = sink1.next(reusableToken)) { + assertTrue(token.term() + " is not equal to " + "The", token.term().equalsIgnoreCase("The") == true); i++; } assertTrue(i + " does not equal: " + sink1.getTokens().size(), i == sink1.getTokens().size()); @@ -96,67 +87,55 @@ public void testMultipleSources() throws Exception { SinkTokenizer theDetector = new SinkTokenizer(null) { - public void add(AttributeSource a) throws IOException { - TermAttribute termAtt = null; - if (a.hasAttribute(TermAttribute.class)) { - termAtt = (TermAttribute) a.getAttribute(TermAttribute.class); + public void add(Token t) { + if (t != null && t.term().equalsIgnoreCase("The")) { + super.add(t); } - if (termAtt != null && termAtt.term().equalsIgnoreCase("The")) { - super.add(a); - } } }; - SinkTokenizer dogDetector = new SinkTokenizer(null) { - public void add(AttributeSource a) throws IOException { - TermAttribute termAtt = null; - if (a.hasAttribute(TermAttribute.class)) { - termAtt = (TermAttribute) a.getAttribute(TermAttribute.class); + SinkTokenizer dogDetector = new SinkTokenizer(null) { + public void add(Token t) { + if (t != null && t.term().equalsIgnoreCase("Dogs")) { + super.add(t); } - if (termAtt != null && termAtt.term().equalsIgnoreCase("Dogs")) { - super.add(a); - } } }; TokenStream source1 = new CachingTokenFilter(new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())), theDetector), dogDetector)); TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer2.toString())), theDetector), dogDetector); int i = 0; - TermAttribute termAtt = (TermAttribute) source1.getAttribute(TermAttribute.class); - while (source1.incrementToken()) { - assertTrue(termAtt.term() + " is not equal to " + tokens1[i], termAtt.term().equals(tokens1[i]) == true); + final Token reusableToken = new Token(); + for (Token nextToken = source1.next(reusableToken); nextToken != null; nextToken = source1.next(reusableToken)) { + assertTrue(nextToken.term() + " is not equal to " + tokens1[i], nextToken.term().equals(tokens1[i]) == true); i++; } assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length); assertTrue("theDetector Size: " + theDetector.getTokens().size() + " is not: " + 2, theDetector.getTokens().size() == 2); assertTrue("dogDetector Size: " + dogDetector.getTokens().size() + " is not: " + 1, dogDetector.getTokens().size() == 1); i = 0; - termAtt = (TermAttribute) source2.getAttribute(TermAttribute.class); - while (source2.incrementToken()) { - assertTrue(termAtt.term() + " is not equal to " + tokens2[i], termAtt.term().equals(tokens2[i]) == true); + for (Token nextToken = source2.next(reusableToken); nextToken != null; nextToken = source2.next(reusableToken)) { + assertTrue(nextToken.term() + " is not equal to " + tokens2[i], nextToken.term().equals(tokens2[i]) == true); i++; } assertTrue(i + " does not equal: " + tokens2.length, i == tokens2.length); assertTrue("theDetector Size: " + theDetector.getTokens().size() + " is not: " + 4, theDetector.getTokens().size() == 4); assertTrue("dogDetector Size: " + dogDetector.getTokens().size() + " is not: " + 2, dogDetector.getTokens().size() == 2); i = 0; - termAtt = (TermAttribute) theDetector.getAttribute(TermAttribute.class); - while (theDetector.incrementToken()) { - assertTrue(termAtt.term() + " is not equal to " + "The", termAtt.term().equalsIgnoreCase("The") == true); + for (Token nextToken = theDetector.next(reusableToken); nextToken != null; nextToken = theDetector.next(reusableToken)) { + assertTrue(nextToken.term() + " is not equal to " + "The", nextToken.term().equalsIgnoreCase("The") == true); i++; } assertTrue(i + " does not equal: " + theDetector.getTokens().size(), i == theDetector.getTokens().size()); i = 0; - termAtt = (TermAttribute) dogDetector.getAttribute(TermAttribute.class); - while (dogDetector.incrementToken()) { - assertTrue(termAtt.term() + " is not equal to " + "Dogs", termAtt.term().equalsIgnoreCase("Dogs") == true); + for (Token nextToken = dogDetector.next(reusableToken); nextToken != null; nextToken = dogDetector.next(reusableToken)) { + assertTrue(nextToken.term() + " is not equal to " + "Dogs", nextToken.term().equalsIgnoreCase("Dogs") == true); i++; } assertTrue(i + " does not equal: " + dogDetector.getTokens().size(), i == dogDetector.getTokens().size()); source1.reset(); TokenStream lowerCasing = new LowerCaseFilter(source1); i = 0; - termAtt = (TermAttribute) lowerCasing.getAttribute(TermAttribute.class); - while (lowerCasing.incrementToken()) { - assertTrue(termAtt.term() + " is not equal to " + tokens1[i].toLowerCase(), termAtt.term().equals(tokens1[i].toLowerCase()) == true); + for (Token nextToken = lowerCasing.next(reusableToken); nextToken != null; nextToken = lowerCasing.next(reusableToken)) { + assertTrue(nextToken.term() + " is not equal to " + tokens1[i].toLowerCase(), nextToken.term().equals(tokens1[i].toLowerCase()) == true); i++; } assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length); @@ -178,20 +157,21 @@ } //make sure we produce the same tokens ModuloSinkTokenizer sink = new ModuloSinkTokenizer(tokCount[k], 100); + final Token reusableToken = new Token(); TokenStream stream = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), sink); - while (stream.incrementToken()) { + while (stream.next(reusableToken) != null) { } stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), 100); List tmp = new ArrayList(); - while (stream.incrementToken()) { - tmp.add(stream.captureState()); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + tmp.add(nextToken.clone()); } List sinkList = sink.getTokens(); assertTrue("tmp Size: " + tmp.size() + " is not: " + sinkList.size(), tmp.size() == sinkList.size()); for (int i = 0; i < tmp.size(); i++) { - AttributeSource tfTok = (AttributeSource) tmp.get(i); - AttributeSource sinkTok = (AttributeSource) sinkList.get(i); - assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true); + Token tfTok = (Token) tmp.get(i); + Token sinkTok = (Token) sinkList.get(i); + assertTrue(tfTok.term() + " is not equal to " + sinkTok.term() + " at token: " + i, tfTok.term().equals(sinkTok.term()) == true); } //simulate two fields, each being analyzed once, for 20 documents @@ -200,14 +180,12 @@ long start = System.currentTimeMillis(); for (int i = 0; i < 20; i++) { stream = new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))); - PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class); - while (stream.incrementToken()) { - tfPos += posIncrAtt.getPositionIncrement(); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + tfPos += nextToken.getPositionIncrement(); } stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), modCounts[j]); - posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class); - while (stream.incrementToken()) { - tfPos += posIncrAtt.getPositionIncrement(); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + tfPos += nextToken.getPositionIncrement(); } } long finish = System.currentTimeMillis(); @@ -218,15 +196,13 @@ for (int i = 0; i < 20; i++) { sink = new ModuloSinkTokenizer(tokCount[k], modCounts[j]); stream = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), sink); - PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class); - while (stream.incrementToken()) { - sinkPos += posIncrAtt.getPositionIncrement(); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + sinkPos += nextToken.getPositionIncrement(); } //System.out.println("Modulo--------"); stream = sink; - posIncrAtt = (PositionIncrementAttribute) stream.getAttribute(PositionIncrementAttribute.class); - while (stream.incrementToken()) { - sinkPos += posIncrAtt.getPositionIncrement(); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + sinkPos += nextToken.getPositionIncrement(); } } finish = System.currentTimeMillis(); @@ -252,15 +228,15 @@ int count = 0; //return every 100 tokens - public boolean incrementToken() throws IOException { - boolean hasNext; - for (hasNext = input.incrementToken(); - hasNext && count % modCount != 0; - hasNext = input.incrementToken()) { + public Token next(final Token reusableToken) throws IOException { + Token nextToken = null; + for (nextToken = input.next(reusableToken); + nextToken != null && count % modCount != 0; + nextToken = input.next(reusableToken)) { count++; } count++; - return hasNext; + return nextToken; } } @@ -274,9 +250,9 @@ lst = new ArrayList(numToks % mc); } - public void add(AttributeSource a) throws IOException { - if (a != null && count % modCount == 0) { - super.add(a); + public void add(Token t) { + if (t != null && count % modCount == 0) { + super.add(t); } count++; } Index: src/test/org/apache/lucene/index/TestDocumentWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 792427) +++ src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy) @@ -141,11 +141,11 @@ public TokenStream tokenStream(String fieldName, Reader reader) { return new TokenFilter(new WhitespaceTokenizer(reader)) { boolean first=true; - AttributeSource state; + AttributeSource.State state; public boolean incrementToken() throws IOException { if (state != null) { - state.restoreState(this); + restoreState(state); payloadAtt.setPayload(null); posIncrAtt.setPositionIncrement(0); termAtt.setTermBuffer(new char[]{'b'}, 0, 1); Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 792427) +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SinkTokenizer; +import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; @@ -3520,47 +3521,21 @@ } } - private static class MyAnalyzer extends Analyzer { - - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream s = new WhitespaceTokenizer(reader); - s.addAttribute(PositionIncrementAttribute.class); - return s; - } - - } - // LUCENE-1255 public void testNegativePositions() throws Throwable { SinkTokenizer tokens = new SinkTokenizer(); - tokens.addAttribute(TermAttribute.class); - tokens.addAttribute(PositionIncrementAttribute.class); + Token t = new Token(); + t.setTermBuffer("a"); + t.setPositionIncrement(0); + tokens.add(t); + t.setTermBuffer("b"); + t.setPositionIncrement(1); + tokens.add(t); + t.setTermBuffer("c"); + tokens.add(t); - AttributeSource state = new AttributeSource(); - TermAttribute termAtt = (TermAttribute) state.addAttribute(TermAttribute.class); - PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class); - termAtt.setTermBuffer("a"); - posIncrAtt.setPositionIncrement(0); - tokens.add(state); - - state = new AttributeSource(); - termAtt = (TermAttribute) state.addAttribute(TermAttribute.class); - posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class); - - termAtt.setTermBuffer("b"); - posIncrAtt.setPositionIncrement(1); - tokens.add(state); - - state = new AttributeSource(); - termAtt = (TermAttribute) state.addAttribute(TermAttribute.class); - posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class); - - termAtt.setTermBuffer("c"); - posIncrAtt.setPositionIncrement(1); - tokens.add(state); - MockRAMDirectory dir = new MockRAMDirectory(); - IndexWriter w = new IndexWriter(dir, new MyAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); + IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("field", tokens)); w.addDocument(doc); Index: src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java =================================================================== --- src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java (revision 792427) +++ src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java (working copy) @@ -320,6 +320,10 @@ public Token next(final Token reusableToken) { return null; } + + public boolean incrementToken() { + return false; + } } } Index: src/test/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- src/test/org/apache/lucene/util/LuceneTestCase.java (revision 792427) +++ src/test/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -44,7 +44,6 @@ protected void setUp() throws Exception { ConcurrentMergeScheduler.setTestMode(); - TokenStream.setUseNewAPIDefault(true); } protected void tearDown() throws Exception {