Index: src/java/org/apache/lucene/analysis/CachingTokenFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/CachingTokenFilter.java (revision 785337) +++ src/java/org/apache/lucene/analysis/CachingTokenFilter.java (working copy) @@ -55,8 +55,8 @@ return false; } // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. - AttributeSource state = (AttributeSource) iterator.next(); - state.restoreState(this); + AttributeSource.State state = (AttributeSource.State) iterator.next(); + restoreState(state); return true; } Index: src/java/org/apache/lucene/analysis/SinkTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/SinkTokenizer.java (revision 785337) +++ src/java/org/apache/lucene/analysis/SinkTokenizer.java (working copy) @@ -71,15 +71,16 @@ if (iter == null) iter = lst.iterator(); // Since this TokenStream can be reset we have to maintain the tokens as immutable if (iter.hasNext()) { - AttributeSource state = (AttributeSource) iter.next(); - state.restoreState(this); + AttributeSource.State state = (AttributeSource.State) iter.next(); + this.restoreState(state); return true; } return false; } public void add(AttributeSource source) throws IOException { - lst.add(source); + AttributeSource.State state = source.captureState(); + lst.add(state); } /** Index: src/java/org/apache/lucene/analysis/TeeTokenFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/TeeTokenFilter.java (revision 785337) +++ src/java/org/apache/lucene/analysis/TeeTokenFilter.java (working copy) @@ -20,7 +20,10 @@ import java.io.IOException; import java.util.Iterator; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; + /** * Works in conjunction with the SinkTokenizer to provide the ability to set aside tokens * that have already been analyzed. This is useful in situations where multiple fields share @@ -63,13 +66,13 @@ this.sink = sink; Iterator it = getAttributesIterator(); while (it.hasNext()) { - sink.addAttribute(it.next().getClass()); + sink.addAttributeImpl((AttributeImpl) it.next()); } } public boolean incrementToken() throws IOException { if (input.incrementToken()) { - sink.add(captureState()); + sink.add(this); return true; } return false; Index: src/java/org/apache/lucene/analysis/Token.java =================================================================== --- src/java/org/apache/lucene/analysis/Token.java (revision 785337) +++ src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -17,9 +17,16 @@ * limitations under the License. */ +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.index.Payload; import org.apache.lucene.index.TermPositions; // for javadoc import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.AttributeImpl; /** This class is now deprecated and a new TokenStream API was introduced with Lucene 2.9. @@ -118,10 +125,10 @@
@see org.apache.lucene.index.Payload - @deprecated A new TokenStream API was introduced with Lucene 2.9. - See javadocs in {@link TokenStream} for further details. */ -public class Token implements Cloneable { +public class Token extends AttributeImpl + implements Cloneable, TermAttribute, TypeAttribute, PositionIncrementAttribute, + FlagsAttribute, OffsetAttribute, PayloadAttribute { public static final String DEFAULT_TYPE = "word"; @@ -561,6 +568,13 @@ public void setEndOffset(int offset) { this.endOffset = offset; } + + /** Set the starting and ending offset. + @see #startOffset() and #endOffset()*/ + public void setOffset(int startOffset, int endOffset) { + this.startOffset = startOffset; + this.endOffset = endOffset; + } /** Returns this Token's lexical type. Defaults to "word". */ public final String type() { @@ -640,19 +654,15 @@ } public Object clone() { - try { - Token t = (Token)super.clone(); - // Do a deep clone - if (termBuffer != null) { - t.termBuffer = (char[]) termBuffer.clone(); - } - if (payload != null) { - t.setPayload((Payload) payload.clone()); - } - return t; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); // shouldn't happen + Token t = (Token)super.clone(); + // Do a deep clone + if (termBuffer != null) { + t.termBuffer = (char[]) termBuffer.clone(); } + if (payload != null) { + t.setPayload((Payload) payload.clone()); + } + return t; } /** Makes a clone, but replaces the term buffer & @@ -862,4 +872,9 @@ type = prototype.type; payload = prototype.payload; } + + public void copyTo(AttributeImpl target) { + Token to = (Token) target; + to.reinit(this); + } } Index: src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java (revision 785337) +++ src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java (working copy) @@ -17,10 +17,7 @@ * limitations under the License. */ -import java.io.Serializable; -import org.apache.lucene.util.Attribute; - /** * This attribute can be used to pass different flags down the tokenizer chain, * e. g. from one TokenFilter to another one. @@ -31,9 +28,7 @@ * We will make our best efforts to keep the APIs backwards-compatible. */ -public class FlagsAttribute extends Attribute implements Cloneable, Serializable { - private int flags = 0; - +public interface FlagsAttribute extends TokenAttribute { /** * EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. * @@ -44,43 +39,10 @@ * * @return The bits */ - public int getFlags() { - return flags; - } + public int getFlags(); /** * @see #getFlags() */ - public void setFlags(int flags) { - this.flags = flags; - } - - public void clear() { - flags = 0; - } - - public String toString() { - return "flags=" + flags; - } - - public boolean equals(Object other) { - if (this == other) { - return true; - } - - if (other instanceof FlagsAttribute) { - return ((FlagsAttribute) other).flags == flags; - } - - return false; - } - - public int hashCode() { - return flags; - } - - public void copyTo(Attribute target) { - FlagsAttribute t = (FlagsAttribute) target; - t.setFlags(flags); - } + public void setFlags(int flags); } Index: src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java (revision 0) @@ -0,0 +1,82 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.util.AttributeImpl; + +/** + * This attribute can be used to pass different flags down the tokenizer chain, + * e. g. from one TokenFilter to another one. + * + *+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + + */ +public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable, Serializable { + private int flags = 0; + + /** + * EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. + *
+ * + * Get the bitset for any bits that have been set. This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes. + * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s. + * + * + * @return The bits + */ + public int getFlags() { + return flags; + } + + /** + * @see #getFlags() + */ + public void setFlags(int flags) { + this.flags = flags; + } + + public void clear() { + flags = 0; + } + + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other instanceof FlagsAttributeImpl) { + return ((FlagsAttributeImpl) other).flags == flags; + } + + return false; + } + + public int hashCode() { + return flags; + } + + public void copyTo(AttributeImpl target) { + FlagsAttribute t = (FlagsAttribute) target; + t.setFlags(flags); + } +} Index: src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java (revision 785337) +++ src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java (working copy) @@ -17,10 +17,7 @@ * limitations under the License. */ -import java.io.Serializable; -import org.apache.lucene.util.Attribute; - /** * The start and end character offset of a Token. * @@ -29,67 +26,23 @@ * The APIs introduced in these classes with Lucene 2.9 might change in the future. * We will make our best efforts to keep the APIs backwards-compatible. */ -public class OffsetAttribute extends Attribute implements Cloneable, Serializable { - private int startOffset; - private int endOffset; - +public interface OffsetAttribute extends TokenAttribute { /** Returns this Token's starting offset, the position of the first character corresponding to this token in the source text. Note that the difference between endOffset() and startOffset() may not be equal to termText.length(), as the term text may have been altered by a stemmer or some other filter. */ - public int startOffset() { - return startOffset; - } + public int startOffset(); /** Set the starting and ending offset. @see #startOffset() and #endOffset()*/ - public void setOffset(int startOffset, int endOffset) { - this.startOffset = startOffset; - this.endOffset = endOffset; - } + public void setOffset(int startOffset, int endOffset); /** Returns this Token's ending offset, one greater than the position of the last character corresponding to this token in the source text. The length of the token in the source text is (endOffset - startOffset). */ - public int endOffset() { - return endOffset; - } - - - public void clear() { - startOffset = 0; - endOffset = 0; - } - - public String toString() { - return "start=" + startOffset + ",end=" + endOffset; - } - - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof OffsetAttribute) { - OffsetAttribute o = (OffsetAttribute) other; - return o.startOffset == startOffset && o.endOffset == endOffset; - } - - return false; - } - - public int hashCode() { - int code = startOffset; - code = code * 31 + endOffset; - return code; - } - - public void copyTo(Attribute target) { - OffsetAttribute t = (OffsetAttribute) target; - t.setOffset(startOffset, endOffset); - } + public int endOffset(); } Index: src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java (revision 0) @@ -0,0 +1,91 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.util.AttributeImpl; + +/** + * The start and end character offset of a Token. + * + *+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + */ +public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable, Serializable { + private int startOffset; + private int endOffset; + + /** Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. */ + public int startOffset() { + return startOffset; + } + + + /** Set the starting and ending offset. + @see #startOffset() and #endOffset()*/ + public void setOffset(int startOffset, int endOffset) { + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + + /** Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). */ + public int endOffset() { + return endOffset; + } + + + public void clear() { + startOffset = 0; + endOffset = 0; + } + + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other instanceof OffsetAttributeImpl) { + OffsetAttributeImpl o = (OffsetAttributeImpl) other; + return o.startOffset == startOffset && o.endOffset == endOffset; + } + + return false; + } + + public int hashCode() { + int code = startOffset; + code = code * 31 + endOffset; + return code; + } + + public void copyTo(AttributeImpl target) { + OffsetAttribute t = (OffsetAttribute) target; + t.setOffset(startOffset, endOffset); + } +} Index: src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (revision 785337) +++ src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (working copy) @@ -17,10 +17,7 @@ * limitations under the License. */ -import java.io.Serializable; - import org.apache.lucene.index.Payload; -import org.apache.lucene.util.Attribute; /** * The payload of a Token. See also {@link Payload}. @@ -30,80 +27,14 @@ * The APIs introduced in these classes with Lucene 2.9 might change in the future. * We will make our best efforts to keep the APIs backwards-compatible. */ -public class PayloadAttribute extends Attribute implements Cloneable, Serializable { - private Payload payload; - +public interface PayloadAttribute extends TokenAttribute { /** - * Initialize this attribute with no payload. - */ - public PayloadAttribute() {} - - /** - * Initialize this attribute with the given payload. - */ - public PayloadAttribute(Payload payload) { - this.payload = payload; - } - - /** * Returns this Token's payload. */ - public Payload getPayload() { - return this.payload; - } + public Payload getPayload(); /** * Sets this Token's payload. */ - public void setPayload(Payload payload) { - this.payload = payload; - } - - public void clear() { - payload = null; - } - - public String toString() { - if (payload == null) { - return "payload=null"; - } - - return "payload=" + payload.toString(); - } - - public Object clone() { - PayloadAttribute clone = (PayloadAttribute) super.clone(); - if (payload != null) { - clone.payload = (Payload) payload.clone(); - } - return clone; - } - - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof PayloadAttribute) { - PayloadAttribute o = (PayloadAttribute) other; - if (o.payload == null || payload == null) { - return o.payload == null && payload == null; - } - - return o.payload.equals(payload); - } - - return false; - } - - public int hashCode() { - return (payload == null) ? 0 : payload.hashCode(); - } - - public void copyTo(Attribute target) { - PayloadAttribute t = (PayloadAttribute) target; - t.setPayload((payload == null) ? null : (Payload) payload.clone()); - } - - + public void setPayload(Payload payload); } Index: src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java (revision 0) @@ -0,0 +1,101 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.index.Payload; +import org.apache.lucene.util.AttributeImpl; + +/** + * The payload of a Token. See also {@link Payload}. + * + *
+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental. + * The APIs introduced in these classes with Lucene 2.9 might change in the future. + * We will make our best efforts to keep the APIs backwards-compatible. + */ +public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable, Serializable { + private Payload payload; + + /** + * Initialize this attribute with no payload. + */ + public PayloadAttributeImpl() {} + + /** + * Initialize this attribute with the given payload. + */ + public PayloadAttributeImpl(Payload payload) { + this.payload = payload; + } + + /** + * Returns this Token's payload. + */ + public Payload getPayload() { + return this.payload; + } + + /** + * Sets this Token's payload. + */ + public void setPayload(Payload payload) { + this.payload = payload; + } + + public void clear() { + payload = null; + } + + public Object clone() { + PayloadAttributeImpl clone = (PayloadAttributeImpl) super.clone(); + if (payload != null) { + clone.payload = (Payload) payload.clone(); + } + return clone; + } + + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other instanceof PayloadAttribute) { + PayloadAttributeImpl o = (PayloadAttributeImpl) other; + if (o.payload == null || payload == null) { + return o.payload == null && payload == null; + } + + return o.payload.equals(payload); + } + + return false; + } + + public int hashCode() { + return (payload == null) ? 0 : payload.hashCode(); + } + + public void copyTo(AttributeImpl target) { + PayloadAttribute t = (PayloadAttribute) target; + t.setPayload((payload == null) ? null : (Payload) payload.clone()); + } + + +} Index: src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java (revision 785337) +++ src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java (working copy) @@ -20,7 +20,7 @@ import java.io.Serializable; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; /** The positionIncrement determines the position of this token * relative to the previous Token in a {@link TokenStream}, used in phrase @@ -53,54 +53,15 @@ * * @see org.apache.lucene.index.TermPositions */ -public class PositionIncrementAttribute extends Attribute implements Cloneable, Serializable { - private int positionIncrement = 1; - +public interface PositionIncrementAttribute extends TokenAttribute { /** Set the position increment. The default value is one. * * @param positionIncrement the distance from the prior term */ - public void setPositionIncrement(int positionIncrement) { - if (positionIncrement < 0) - throw new IllegalArgumentException - ("Increment must be zero or greater: " + positionIncrement); - this.positionIncrement = positionIncrement; - } + public void setPositionIncrement(int positionIncrement); /** Returns the position increment of this Token. * @see #setPositionIncrement */ - public int getPositionIncrement() { - return positionIncrement; - } - - public void clear() { - this.positionIncrement = 1; - } - - public String toString() { - return "positionIncrement=" + positionIncrement; - } - - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof PositionIncrementAttribute) { - return positionIncrement == ((PositionIncrementAttribute) other).positionIncrement; - } - - return false; - } - - public int hashCode() { - return positionIncrement; - } - - public void copyTo(Attribute target) { - PositionIncrementAttribute t = (PositionIncrementAttribute) target; - t.setPositionIncrement(positionIncrement); - } - + public int getPositionIncrement(); } Index: src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java (revision 0) @@ -0,0 +1,102 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.AttributeImpl; + +/** The positionIncrement determines the position of this token + * relative to the previous Token in a {@link TokenStream}, used in phrase + * searching. + * + *
The default value is one. + * + *
Some common uses for this are:
+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental.
+ * The APIs introduced in these classes with Lucene 2.9 might change in the future.
+ * We will make our best efforts to keep the APIs backwards-compatible.
+ *
+ * @see org.apache.lucene.index.TermPositions
+ */
+public class PositionIncrementAttributeImpl extends AttributeImpl implements PositionIncrementAttribute, Cloneable, Serializable {
+ private int positionIncrement = 1;
+
+ /** Set the position increment. The default value is one.
+ *
+ * @param positionIncrement the distance from the prior term
+ */
+ public void setPositionIncrement(int positionIncrement) {
+ if (positionIncrement < 0)
+ throw new IllegalArgumentException
+ ("Increment must be zero or greater: " + positionIncrement);
+ this.positionIncrement = positionIncrement;
+ }
+
+ /** Returns the position increment of this Token.
+ * @see #setPositionIncrement
+ */
+ public int getPositionIncrement() {
+ return positionIncrement;
+ }
+
+ public void clear() {
+ this.positionIncrement = 1;
+ }
+
+ public boolean equals(Object other) {
+ if (other == this) {
+ return true;
+ }
+
+ if (other instanceof PositionIncrementAttributeImpl) {
+ return positionIncrement == ((PositionIncrementAttributeImpl) other).positionIncrement;
+ }
+
+ return false;
+ }
+
+ public int hashCode() {
+ return positionIncrement;
+ }
+
+ public void copyTo(AttributeImpl target) {
+ PositionIncrementAttribute t = (PositionIncrementAttribute) target;
+ t.setPositionIncrement(positionIncrement);
+ }
+
+}
Index: src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java
===================================================================
--- src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java (revision 785337)
+++ src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java (working copy)
@@ -17,11 +17,7 @@
* limitations under the License.
*/
-import java.io.Serializable;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Attribute;
-
/**
* The term text of a Token.
*
@@ -30,12 +26,7 @@
* The APIs introduced in these classes with Lucene 2.9 might change in the future.
* We will make our best efforts to keep the APIs backwards-compatible.
*/
-public class TermAttribute extends Attribute implements Cloneable, Serializable {
- private static int MIN_BUFFER_SIZE = 10;
-
- private char[] termBuffer;
- private int termLength;
-
+public interface TermAttribute extends TokenAttribute {
/** Returns the Token's term text.
*
* This method has a performance penalty
@@ -45,38 +36,20 @@
* String, use this method, which is nothing more than
* a convenience call to new String(token.termBuffer(), 0, token.termLength())
*/
- public String term() {
- initTermBuffer();
- return new String(termBuffer, 0, termLength);
- }
-
+ public String term();
+
/** Copies the contents of buffer, starting at offset for
* length characters, into the termBuffer array.
* @param buffer the buffer to copy
* @param offset the index in the buffer of the first character to copy
* @param length the number of characters to copy
*/
- public void setTermBuffer(char[] buffer, int offset, int length) {
- char[] newCharBuffer = growTermBuffer(length);
- if (newCharBuffer != null) {
- termBuffer = newCharBuffer;
- }
- System.arraycopy(buffer, offset, termBuffer, 0, length);
- termLength = length;
- }
+ public void setTermBuffer(char[] buffer, int offset, int length);
/** Copies the contents of buffer into the termBuffer array.
* @param buffer the buffer to copy
*/
- public void setTermBuffer(String buffer) {
- int length = buffer.length();
- char[] newCharBuffer = growTermBuffer(length);
- if (newCharBuffer != null) {
- termBuffer = newCharBuffer;
- }
- buffer.getChars(0, length, termBuffer, 0);
- termLength = length;
- }
+ public void setTermBuffer(String buffer);
/** Copies the contents of buffer, starting at offset and continuing
* for length characters, into the termBuffer array.
@@ -84,17 +57,8 @@
* @param offset the index in the buffer of the first character to copy
* @param length the number of characters to copy
*/
- public void setTermBuffer(String buffer, int offset, int length) {
- assert offset <= buffer.length();
- assert offset + length <= buffer.length();
- char[] newCharBuffer = growTermBuffer(length);
- if (newCharBuffer != null) {
- termBuffer = newCharBuffer;
- }
- buffer.getChars(offset, offset + length, termBuffer, 0);
- termLength = length;
- }
-
+ public void setTermBuffer(String buffer, int offset, int length);
+
/** Returns the internal termBuffer character array which
* you can then directly alter. If the array is too
* small for your token, use {@link
@@ -102,10 +66,7 @@
* altering the buffer be sure to call {@link
* #setTermLength} to record the number of valid
* characters that were placed into the termBuffer. */
- public char[] termBuffer() {
- initTermBuffer();
- return termBuffer;
- }
+ public char[] termBuffer();
/** Grows the termBuffer to at least size newSize, preserving the
* existing content. Note: If the next operation is to change
@@ -117,63 +78,12 @@
* @param newSize minimum size of the new termBuffer
* @return newly created termBuffer with length >= newSize
*/
- public char[] resizeTermBuffer(int newSize) {
- char[] newCharBuffer = growTermBuffer(newSize);
- if (termBuffer == null) {
- // If there were termText, then preserve it.
- // note that if termBuffer is null then newCharBuffer cannot be null
- assert newCharBuffer != null;
- termBuffer = newCharBuffer;
- } else if (newCharBuffer != null) {
- // Note: if newCharBuffer != null then termBuffer needs to grow.
- // If there were a termBuffer, then preserve it
- System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
- termBuffer = newCharBuffer;
- }
- return termBuffer;
- }
+ public char[] resizeTermBuffer(int newSize);
- /** Allocates a buffer char[] of at least newSize
- * @param newSize minimum size of the buffer
- * @return newly created buffer with length >= newSize or null if the current termBuffer is big enough
- */
- private char[] growTermBuffer(int newSize) {
- if (termBuffer != null) {
- if (termBuffer.length >= newSize)
- // Already big enough
- return null;
- else
- // Not big enough; create a new array with slight
- // over allocation:
- return new char[ArrayUtil.getNextSize(newSize)];
- } else {
-
- // determine the best size
- // The buffer is always at least MIN_BUFFER_SIZE
- if (newSize < MIN_BUFFER_SIZE) {
- newSize = MIN_BUFFER_SIZE;
- }
-
- return new char[newSize];
- }
- }
-
- // TODO: once we remove the deprecated termText() method
- // and switch entirely to char[] termBuffer we don't need
- // to use this method anymore
- private void initTermBuffer() {
- if (termBuffer == null) {
- termBuffer = new char[MIN_BUFFER_SIZE];
- termLength = 0;
- }
- }
-
/** Return number of valid characters (length of the term)
* in the termBuffer array. */
- public int termLength() {
- return termLength;
- }
-
+ public int termLength();
+
/** Set number of valid characters (length of the term) in
* the termBuffer array. Use this to truncate the termBuffer
* or to synchronize with external manipulation of the termBuffer.
@@ -181,61 +91,5 @@
* use {@link #resizeTermBuffer(int)} first.
* @param length the truncated length
*/
- public void setTermLength(int length) {
- initTermBuffer();
- if (length > termBuffer.length)
- throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
- termLength = length;
- }
-
- public int hashCode() {
- initTermBuffer();
- int code = termLength;
- code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
- return code;
- }
-
- public void clear() {
- termLength = 0;
- }
-
- public Object clone() {
- TermAttribute t = (TermAttribute)super.clone();
- // Do a deep clone
- if (termBuffer != null) {
- t.termBuffer = (char[]) termBuffer.clone();
- }
- return t;
- }
-
- public boolean equals(Object other) {
- if (other == this) {
- return true;
- }
-
- if (other instanceof TermAttribute) {
- initTermBuffer();
- TermAttribute o = ((TermAttribute) other);
- o.initTermBuffer();
-
- for(int i=0;i
+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental.
+ * The APIs introduced in these classes with Lucene 2.9 might change in the future.
+ * We will make our best efforts to keep the APIs backwards-compatible.
+ */
+public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable, Serializable {
+ private String type;
+ public static final String DEFAULT_TYPE = "word";
+
+ public TypeAttributeImpl() {
+ this(DEFAULT_TYPE);
+ }
+
+ public TypeAttributeImpl(String type) {
+ this.type = type;
+ }
+
+ /** Returns this Token's lexical type. Defaults to "word". */
+ public String type() {
+ return type;
+ }
+
+ /** Set the lexical type.
+ @see #type() */
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public void clear() {
+ type = DEFAULT_TYPE;
+ }
+
+ public boolean equals(Object other) {
+ if (other == this) {
+ return true;
+ }
+
+ if (other instanceof TypeAttributeImpl) {
+ return type.equals(((TypeAttributeImpl) other).type);
+ }
+
+ return false;
+ }
+
+ public int hashCode() {
+ return type.hashCode();
+ }
+
+ public void copyTo(AttributeImpl target) {
+ TypeAttribute t = (TypeAttribute) target;
+ t.setType(new String(type));
+ }
+}
Index: src/java/org/apache/lucene/analysis/TokenFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/TokenFilter.java (revision 785337)
+++ src/java/org/apache/lucene/analysis/TokenFilter.java (working copy)
@@ -42,7 +42,7 @@
super(input);
this.input = input;
}
-
+
/** Close the input TokenStream. */
public void close() throws IOException {
input.close();
@@ -53,17 +53,4 @@
super.reset();
input.reset();
}
-
- public boolean useNewAPI() {
- return input.useNewAPI();
- }
-
- /**
- * Sets whether or not to use the new TokenStream API. Settings this
- * will apply to this Filter and all TokenStream/Filters upstream.
- */
- public void setUseNewAPI(boolean use) {
- input.setUseNewAPI(use);
- }
-
}
Index: src/java/org/apache/lucene/analysis/TokenStream.java
===================================================================
--- src/java/org/apache/lucene/analysis/TokenStream.java (revision 785337)
+++ src/java/org/apache/lucene/analysis/TokenStream.java (working copy)
@@ -21,8 +21,9 @@
import java.util.Iterator;
import org.apache.lucene.index.Payload;
-import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.analysis.tokenattributes.*;
/** A TokenStream enumerates the sequence of tokens, either from
fields of a document or from query text.
@@ -36,13 +37,13 @@
A new TokenStream API is introduced with Lucene 2.9. Since
2.9 Token is deprecated and the preferred way to store
- the information of a token is to use {@link Attribute}s.
+ the information of a token is to use {@link AttributeImpl}s.
For that reason TokenStream extends {@link AttributeSource}
- now. Note that only one instance per {@link Attribute} is
+ now. Note that only one instance per {@link AttributeImpl} is
created and reused for every token. This approach reduces
object creations and allows local caching of references to
- the {@link Attribute}s. See {@link #incrementToken()} for further details.
+ the {@link AttributeImpl}s. See {@link #incrementToken()} for further details.
The workflow of the new TokenStream API is as follows:
- * If set to true, the indexer will call {@link #incrementToken()}
- * to consume Tokens from this stream.
- *
- * If set to false, the indexer will call {@link #next(Token)}
- * instead.
- */
- public static void setUseNewAPIDefault(boolean use) {
- useNewAPIDefault = use;
- }
- /**
- * Returns whether or not the new TokenStream APIs are used
- * for this stream.
- * (see {@link #incrementToken()}, {@link AttributeSource}).
- */
- public boolean useNewAPI() {
- return useNewAPI;
+ protected void initialize() {
+ /*try {
+ // check if this instance hasn't overridden TokenStream.incrementToken()
+ if (TokenStream.class == getClass().getMethod("incrementToken",new Class[0]).getDeclaringClass()) {*/
+ // initialize the backwards compatibility wrapper using a Token instance
+ addAttributeImpl(new Token());
+ if (
+ // check that the basic attributes are all Token instances,
+ // and no one registered another instance before.
+ getAttribute(TermAttribute.class) instanceof Token &&
+ getAttribute(TypeAttribute.class) instanceof Token &&
+ getAttribute(PositionIncrementAttribute.class) instanceof Token &&
+ getAttribute(FlagsAttribute.class) instanceof Token &&
+ getAttribute(OffsetAttribute.class) instanceof Token &&
+ getAttribute(PayloadAttribute.class) instanceof Token
+ ) {
+ // get any attribute (its always the same, a Token)
+ localToken = (Token) getAttribute(TermAttribute.class);
+ } else {
+ throw new IllegalArgumentException("All basic attributes for TokenStreams must be implemented by Token for backwards compatibility.");
+ }/*
+ } else {
+ localToken = null;
+ }
+ } catch (NoSuchMethodException e) {
+ // should always work
+ throw new RuntimeException(e);
+ }*/
}
-
- /**
- * Use this API to enable or disable the new TokenStream API
- * for this stream. Overrides {@link #setUseNewAPIDefault(boolean)}.
- * (see {@link #incrementToken()}, {@link AttributeSource}).
- *
- * If set to true, the indexer will call {@link #incrementToken()}
- * to consume Tokens from this stream.
- *
- * If set to false, the indexer will call {@link #next(Token)}
- * instead.
- *
- * NOTE: All streams and filters in one chain must use the
- * same API.
- */
- public void setUseNewAPI(boolean use) {
- useNewAPI = use;
- }
/**
* Consumers (e. g. the indexer) use this method to advance the stream
* to the next token. Implementing classes must implement this method
- * and update the appropriate {@link Attribute}s with content of the
+ * and update the appropriate {@link AttributeImpl}s with content of the
* next token.
*
* This method is called for every token of a document, so an efficient
* implementation is crucial for good performance. To avoid calls to
* {@link #addAttribute(Class)} and {@link #getAttribute(Class)} and
- * downcasts, references to all {@link Attribute}s that this stream uses
+ * downcasts, references to all {@link AttributeImpl}s that this stream uses
* should be retrieved during instantiation.
*
* To make sure that filters and consumers know which attributes are available
@@ -164,28 +145,20 @@
* Note that this method will be defined abstract in Lucene 3.0.
*/
public boolean incrementToken() throws IOException {
- // subclasses must implement this method; will be made abstract in Lucene 3.0
- return false;
- }
-
- /** Returns the next token in the stream, or null at EOS.
- * @deprecated The returned Token is a "full private copy" (not
- * re-used across calls to next()) but will be slower
- * than calling {@link #next(Token)} instead.. */
- public Token next() throws IOException {
- final Token reusableToken = new Token();
- Token nextToken = next(reusableToken);
-
- if (nextToken != null) {
+ final Token nextToken = next(localToken);
+ if (nextToken == null) {
+ return false;
+ }
+ if (nextToken != localToken) {
+ localToken.reinit(nextToken);
Payload p = nextToken.getPayload();
if (p != null) {
nextToken.setPayload((Payload) p.clone());
}
}
-
- return nextToken;
- }
-
+ return true;
+ }
+
/** Returns the next token in the stream, or null at EOS.
* When possible, the input Token should be used as the
* returned Token (this gives fastest tokenization
@@ -215,14 +188,31 @@
* good idea to assert that it is not null.)
* @return next token in the stream or null if end-of-stream was hit
* @deprecated The new {@link #incrementToken()} and {@link AttributeSource}
- * APIs should be used instead. See also {@link #useNewAPI()}.
+ * APIs should be used instead.
*/
public Token next(final Token reusableToken) throws IOException {
- // We don't actually use inputToken, but still add this assert
+ // We don't actually use reusableToken, but still add this assert
assert reusableToken != null;
return next();
}
+ /** Returns the next token in the stream, or null at EOS.
+ * @deprecated The returned Token is a "full private copy" (not
+ * re-used across calls to next()) but will be slower
+ * than calling {@link #next(Token)} instead.. */
+ public Token next() throws IOException {
+ final Token token = new Token();
+ if (incrementToken()) {
+ token.reinit(localToken);
+ Payload p = token.getPayload();
+ if (p != null) {
+ token.setPayload((Payload) p.clone());
+ }
+ return token;
+ }
+ return null;
+ }
+
/** Resets this stream to the beginning. This is an
* optional operation, so subclasses may or may not
* implement this method. Reset() is not needed for
@@ -259,5 +249,5 @@
sb.append(')');
return sb.toString();
}
-
+
}
Index: src/java/org/apache/lucene/index/DocInverterPerField.java
===================================================================
--- src/java/org/apache/lucene/index/DocInverterPerField.java (revision 785337)
+++ src/java/org/apache/lucene/index/DocInverterPerField.java (working copy)
@@ -20,7 +20,6 @@
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -83,7 +82,6 @@
final int valueLength = stringValue.length();
perThread.singleTokenTokenStream.reinit(stringValue, 0, valueLength);
fieldState.attributeSource = perThread.singleTokenTokenStream;
- perThread.localTokenStream.reset();
consumer.start(field);
boolean success = false;
@@ -132,21 +130,15 @@
try {
int offsetEnd = fieldState.offset-1;
- boolean useNewTokenStreamAPI = stream.useNewAPI();
- Token localToken = null;
-
- if (useNewTokenStreamAPI) {
- fieldState.attributeSource = stream;
- } else {
- fieldState.attributeSource = perThread.localTokenStream;
- localToken = perThread.localToken;
- }
-
- consumer.start(field);
+ boolean hasMoreTokens = stream.incrementToken();
+ fieldState.attributeSource = stream;
+
OffsetAttribute offsetAttribute = (OffsetAttribute) fieldState.attributeSource.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute) fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
+ consumer.start(field);
+
for(;;) {
// If we hit an exception in stream.next below
@@ -155,15 +147,9 @@
// non-aborting and (above) this one document
// will be marked as deleted, but still
// consume a docID
- Token token = null;
- if (useNewTokenStreamAPI) {
- if (!stream.incrementToken()) break;
- } else {
- token = stream.next(localToken);
- if (token == null) break;
- perThread.localTokenStream.set(token);
- }
+ if (!hasMoreTokens) break;
+
final int posIncr = posIncrAttribute.getPositionIncrement();
fieldState.position += posIncr;
if (allowMinus1Position || fieldState.position > 0) {
@@ -194,6 +180,8 @@
docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
break;
}
+
+ hasMoreTokens = stream.incrementToken();
}
fieldState.offset = offsetEnd+1;
} finally {
Index: src/java/org/apache/lucene/index/DocInverterPerThread.java
===================================================================
--- src/java/org/apache/lucene/index/DocInverterPerThread.java (revision 785337)
+++ src/java/org/apache/lucene/index/DocInverterPerThread.java (working copy)
@@ -19,15 +19,9 @@
import java.io.IOException;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.util.Attribute;
/** This is a DocFieldConsumer that inverts each field,
* separately, from a Document, and accepts a
@@ -37,10 +31,8 @@
final DocInverter docInverter;
final InvertedDocConsumerPerThread consumer;
final InvertedDocEndConsumerPerThread endConsumer;
- final Token localToken = new Token();
//TODO: change to SingleTokenTokenStream after Token was removed
final SingleTokenTokenStream singleTokenTokenStream = new SingleTokenTokenStream();
- final BackwardsCompatibilityStream localTokenStream = new BackwardsCompatibilityStream();
static class SingleTokenTokenStream extends TokenStream {
TermAttribute termAttribute;
@@ -57,74 +49,6 @@
}
}
- /** This stream wrapper is only used to maintain backwards compatibility with the
- * old TokenStream API and can be removed in Lucene 3.0
- * @deprecated
- */
- static class BackwardsCompatibilityStream extends TokenStream {
- private Token token;
-
- TermAttribute termAttribute = new TermAttribute() {
- public String term() {
- return token.term();
- }
-
- public char[] termBuffer() {
- return token.termBuffer();
- }
-
- public int termLength() {
- return token.termLength();
- }
- };
- OffsetAttribute offsetAttribute = new OffsetAttribute() {
- public int startOffset() {
- return token.startOffset();
- }
-
- public int endOffset() {
- return token.endOffset();
- }
- };
-
- PositionIncrementAttribute positionIncrementAttribute = new PositionIncrementAttribute() {
- public int getPositionIncrement() {
- return token.getPositionIncrement();
- }
- };
-
- FlagsAttribute flagsAttribute = new FlagsAttribute() {
- public int getFlags() {
- return token.getFlags();
- }
- };
-
- PayloadAttribute payloadAttribute = new PayloadAttribute() {
- public Payload getPayload() {
- return token.getPayload();
- }
- };
-
- TypeAttribute typeAttribute = new TypeAttribute() {
- public String type() {
- return token.type();
- }
- };
-
- BackwardsCompatibilityStream() {
- attributes.put(TermAttribute.class, termAttribute);
- attributes.put(OffsetAttribute.class, offsetAttribute);
- attributes.put(PositionIncrementAttribute.class, positionIncrementAttribute);
- attributes.put(FlagsAttribute.class, flagsAttribute);
- attributes.put(PayloadAttribute.class, payloadAttribute);
- attributes.put(TypeAttribute.class, typeAttribute);
- }
-
- public void set(Token token) {
- this.token = token;
- }
- };
-
final DocumentsWriter.DocState docState;
final FieldInvertState fieldState = new FieldInvertState();
Index: src/java/org/apache/lucene/queryParser/QueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 785337)
+++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy)
@@ -526,67 +526,42 @@
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
- org.apache.lucene.analysis.Token reusableToken = null;
- org.apache.lucene.analysis.Token nextToken = null;
-
-
- boolean useNewAPI = TokenStream.useNewAPIDefault();
-
- if (useNewAPI) {
- boolean success = false;
- try {
- buffer.reset();
- success = true;
- } catch (IOException e) {
- // success==false if we hit an exception
+ boolean success = false;
+ try {
+ buffer.reset();
+ success = true;
+ } catch (IOException e) {
+ // success==false if we hit an exception
+ }
+ if (success) {
+ if (buffer.hasAttribute(TermAttribute.class)) {
+ termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class);
}
- if (success) {
- if (buffer.hasAttribute(TermAttribute.class)) {
- termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class);
- }
- if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
- posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class);
- }
+ if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
+ posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class);
}
- } else {
- reusableToken = new org.apache.lucene.analysis.Token();
}
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
- if (useNewAPI) {
- if (termAtt != null) {
- try {
- while (buffer.incrementToken()) {
- numTokens++;
- int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
- if (positionIncrement != 0) {
- positionCount += positionIncrement;
- } else {
- severalTokensAtSamePosition = true;
- }
+ boolean hasMoreTokens = false;
+ if (termAtt != null) {
+ try {
+ hasMoreTokens = buffer.incrementToken();
+ while (hasMoreTokens) {
+ numTokens++;
+ int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
+ if (positionIncrement != 0) {
+ positionCount += positionIncrement;
+ } else {
+ severalTokensAtSamePosition = true;
}
- } catch (IOException e) {
- // ignore
+ hasMoreTokens = buffer.incrementToken();
}
+ } catch (IOException e) {
+ // ignore
}
- } else {
- while (true) {
- try {
- nextToken = buffer.next(reusableToken);
- }
- catch (IOException e) {
- nextToken = null;
- }
- if (nextToken == null)
- break;
- numTokens++;
- if (nextToken.getPositionIncrement() != 0)
- positionCount += nextToken.getPositionIncrement();
- else
- severalTokensAtSamePosition = true;
- }
}
try {
// rewind the buffer stream
@@ -604,16 +579,9 @@
else if (numTokens == 1) {
String term = null;
try {
-
- if (useNewAPI) {
- boolean hasNext = buffer.incrementToken();
- assert hasNext == true;
- term = termAtt.term();
- } else {
- nextToken = buffer.next(reusableToken);
- assert nextToken != null;
- term = nextToken.term();
- }
+ boolean hasNext = buffer.incrementToken();
+ assert hasNext == true;
+ term = termAtt.term();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@@ -626,15 +594,9 @@
for (int i = 0; i < numTokens; i++) {
String term = null;
try {
- if (useNewAPI) {
- boolean hasNext = buffer.incrementToken();
- assert hasNext == true;
- term = termAtt.term();
- } else {
- nextToken = buffer.next(reusableToken);
- assert nextToken != null;
- term = nextToken.term();
- }
+ boolean hasNext = buffer.incrementToken();
+ assert hasNext == true;
+ term = termAtt.term();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@@ -655,18 +617,11 @@
String term = null;
int positionIncrement = 1;
try {
- if (useNewAPI) {
- boolean hasNext = buffer.incrementToken();
- assert hasNext == true;
- term = termAtt.term();
- if (posIncrAtt != null) {
- positionIncrement = posIncrAtt.getPositionIncrement();
- }
- } else {
- nextToken = buffer.next(reusableToken);
- assert nextToken != null;
- term = nextToken.term();
- positionIncrement = nextToken.getPositionIncrement();
+ boolean hasNext = buffer.incrementToken();
+ assert hasNext == true;
+ term = termAtt.term();
+ if (posIncrAtt != null) {
+ positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
@@ -702,19 +657,11 @@
int positionIncrement = 1;
try {
- if (useNewAPI) {
-
- boolean hasNext = buffer.incrementToken();
- assert hasNext == true;
- term = termAtt.term();
- if (posIncrAtt != null) {
- positionIncrement = posIncrAtt.getPositionIncrement();
- }
- } else {
- nextToken = buffer.next(reusableToken);
- assert nextToken != null;
- term = nextToken.term();
- positionIncrement = nextToken.getPositionIncrement();
+ boolean hasNext = buffer.incrementToken();
+ assert hasNext == true;
+ term = termAtt.term();
+ if (posIncrAtt != null) {
+ positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
Index: src/java/org/apache/lucene/search/QueryTermVector.java
===================================================================
--- src/java/org/apache/lucene/search/QueryTermVector.java (revision 785337)
+++ src/java/org/apache/lucene/search/QueryTermVector.java (working copy)
@@ -27,7 +27,6 @@
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.TermFreqVector;
@@ -59,17 +58,15 @@
{
List terms = new ArrayList();
try {
- if (stream.useNewAPI()) {
- stream.reset();
- TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
- while (stream.incrementToken()) {
- terms.add(termAtt.term());
- }
- } else {
- final Token reusableToken = new Token();
- for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
- terms.add(nextToken.term());
- }
+ boolean hasMoreTokens = false;
+
+ stream.reset();
+ TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
+
+ hasMoreTokens = stream.incrementToken();
+ while (hasMoreTokens) {
+ terms.add(termAtt.term());
+ hasMoreTokens = stream.incrementToken();
}
processTerms((String[])terms.toArray(new String[terms.size()]));
} catch (IOException e) {
Index: src/java/org/apache/lucene/util/Attribute.java
===================================================================
--- src/java/org/apache/lucene/util/Attribute.java (revision 785340)
+++ src/java/org/apache/lucene/util/Attribute.java (working copy)
@@ -1,95 +1,5 @@
package org.apache.lucene.util;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+public interface Attribute {
-import java.io.Serializable;
-
-/**
- * Base class for Attributes that can be added to a
- * {@link org.apache.lucene.util.AttributeSource}.
- *
- * Attributes are used to add data in a dynamic, yet type-safe way to a source
- * of usually streamed objects, e. g. a {@link org.apache.lucene.analysis.TokenStream}.
- *
- * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental.
- * The APIs introduced in these classes with Lucene 2.9 might change in the future.
- * We will make our best efforts to keep the APIs backwards-compatible.
- */
-public abstract class Attribute implements Cloneable, Serializable {
- /**
- * Clears the values in this Attribute and resets it to its
- * default value.
- */
- public abstract void clear();
-
- /**
- * Subclasses must implement this method and should follow a syntax
- * similar to this one:
- *
- *
+ * Attributes are used to add data in a dynamic, yet type-safe way to a source
+ * of usually streamed objects, e. g. a {@link org.apache.lucene.analysis.TokenStream}.
+ *
+ * WARNING: The status of the new TokenStream, AttributeSource and Attributes is experimental.
+ * The APIs introduced in these classes with Lucene 2.9 might change in the future.
+ * We will make our best efforts to keep the APIs backwards-compatible.
+ */
+public abstract class AttributeImpl implements Cloneable, Serializable {
+ /**
+ * Clears the values in this Attribute and resets it to its
+ * default value.
+ */
+ public abstract void clear();
+
+ /**
+ * The default implementation of this method accesses all declared
+ * fields of this object and prints the values in the following syntax:
+ *
+ *
+ * Note that this method does not affect attributes of the targetStream
+ * that are not contained in this state. In other words, if for example
+ * the targetStream contains an OffsetAttribute, but this state doesn't, then
+ * the value of the OffsetAttribute remains unchanged. It might be desirable to
+ * reset its value to the default, in which case the caller should first
+ * call {@link TokenStream#clearAttributes()} on the targetStream.
+ */
+ public void restoreState(State state) {
+ if (currentState == null) {
+ computeCurrentState();
+ }
+
+ State source = state;
+ State target = currentState;
+
+ do {
+ assert source != null && target != null && source.getClass() == target.getClass();
+ source.attribute.copyTo(target.attribute);
+ source = source.next;
+ target = target.next;
+ } while (source != null);
+ }
+
+
+ /**
* An AttributeAcceptor defines only a single method {@link #accept(Class)}.
* It can be used for e. g. buffering purposes to specify which attributes
* to buffer.
@@ -56,13 +153,20 @@
public boolean accept(Class attClass) {return true;}
};
+ private AttributeFactory factory = AttributeFactory.DefaultAttributeFactory;
+
/**
* Holds the Class<Attribute> -> Attribute mapping
*/
protected Map attributes;
+ protected void initialize() {
+
+ }
+
public AttributeSource() {
this.attributes = new LinkedHashMap();
+ initialize();
}
public AttributeSource(AttributeSource input) {
@@ -70,8 +174,17 @@
throw new IllegalArgumentException("input AttributeSource must not be null");
}
this.attributes = input.attributes;
+ initialize();
}
+ public void setAttributeFactory(AttributeFactory factory) {
+ this.factory = factory;
+ }
+
+ public AttributeFactory getAttributeFactory() {
+ return this.factory;
+ }
+
/** Returns an iterator that iterates the attributes
* in the same order they were added in.
*/
@@ -79,24 +192,39 @@
return attributes.values().iterator();
}
+ public void addAttributeImpl(AttributeImpl att) {
+ // find all interfaces that this attribute instance implements
+ // and that extend the Attribute interface
+ Class clazz = att.getClass();
+ do {
+ Class[] interfaces = att.getClass().getInterfaces();
+ for (int i = 0; i < interfaces.length; i++) {
+ Class curInterface = interfaces[i];
+ if (Attribute.class.isAssignableFrom(curInterface)) {
+ // Attribute is a superclass of this interface
+ if (!attributes.containsKey(curInterface)) {
+ // invalidate state to force recomputation in captureState()
+ this.currentState = null;
+ attributes.put(curInterface, att);
+ }
+ }
+ }
+
+ clazz = clazz.getSuperclass();
+ } while (clazz != null);
+ }
+
/**
* The caller must pass in a Class<? extends Attribute> value.
* This method first checks if an instance of that class is
* already in this AttributeSource and returns it. Otherwise a
* new instance is created, added to this AttributeSource and returned.
*/
- public Attribute addAttribute(Class attClass) {
- Attribute att = (Attribute) attributes.get(attClass);
+ public AttributeImpl addAttribute(Class attClass) {
+ AttributeImpl att = (AttributeImpl) attributes.get(attClass);
if (att == null) {
- try {
- att = (Attribute) attClass.newInstance();
- } catch (InstantiationException e) {
- throw new IllegalArgumentException("Could not instantiate class " + attClass);
- } catch (IllegalAccessException e) {
- throw new IllegalArgumentException("Could not instantiate class " + attClass);
- }
-
- attributes.put(attClass, att);
+ att = this.factory.createAttributeInstance(attClass);
+ addAttributeImpl(att);
}
return att;
}
@@ -121,8 +249,8 @@
* @throws IllegalArgumentException if this AttributeSource does not contain the
* Attribute
*/
- public Attribute getAttribute(Class attClass) {
- Attribute att = (Attribute) this.attributes.get(attClass);
+ public AttributeImpl getAttribute(Class attClass) {
+ AttributeImpl att = (AttributeImpl) this.attributes.get(attClass);
if (att == null) {
throw new IllegalArgumentException("This token does not have the attribute '" + attClass + "'.");
}
@@ -132,12 +260,12 @@
/**
* Resets all Attributes in this AttributeSource by calling
- * {@link Attribute#clear()} on each Attribute.
+ * {@link AttributeImpl#clear()} on each Attribute.
*/
public void clearAttributes() {
- Iterator it = getAttributesIterator();
+ Iterator it = this.attributes.values().iterator();
while (it.hasNext()) {
- ((Attribute) it.next()).clear();
+ ((AttributeImpl) it.next()).clear();
}
}
@@ -145,27 +273,16 @@
* Captures the current state of the passed in TokenStream.
*
* This state will contain all of the passed in TokenStream's
- * {@link Attribute}s. If only a subset of the attributes is needed
- * please use {@link #captureState(AttributeAcceptor)}
+ * {@link AttributeImpl}s which the {@link AttributeAcceptor} accepts.
*/
- public AttributeSource captureState() {
- return captureState(AllAcceptor);
- }
-
- /**
- * Captures the current state of the passed in TokenStream.
- *
- * This state will contain all of the passed in TokenStream's
- * {@link Attribute}s which the {@link AttributeAcceptor} accepts.
- */
public AttributeSource captureState(AttributeAcceptor acceptor) {
AttributeSource state = new AttributeSource();
Iterator it = getAttributesIterator();
while(it.hasNext()) {
- Attribute att = (Attribute) it.next();
+ AttributeImpl att = (AttributeImpl) it.next();
if (acceptor.accept(att.getClass())) {
- Attribute clone = (Attribute) att.clone();
+ AttributeImpl clone = (AttributeImpl) att.clone();
state.attributes.put(att.getClass(), clone);
}
}
@@ -173,28 +290,6 @@
return state;
}
- /**
- * Restores this state by copying the values of all attributes
- * that this state contains into the attributes of the targetStream.
- * The targetStream must contain a corresponding instance for each argument
- * contained in this state.
- *
- * Note that this method does not affect attributes of the targetStream
- * that are not contained in this state. In other words, if for example
- * the targetStream contains an OffsetAttribute, but this state doesn't, then
- * the value of the OffsetAttribute remains unchanged. It might be desirable to
- * reset its value to the default, in which case the caller should first
- * call {@link TokenStream#clearAttributes()} on the targetStream.
- */
- public void restoreState(AttributeSource target) {
- Iterator it = getAttributesIterator();
- while (it.hasNext()) {
- Attribute att = (Attribute) it.next();
- Attribute targetAtt = target.getAttribute(att.getClass());
- att.copyTo(targetAtt);
- }
- }
-
public int hashCode() {
int code = 0;
if (hasAttributes()) {
@@ -228,7 +323,7 @@
while (it.hasNext()) {
Class attName = it.next().getClass();
- Attribute otherAtt = (Attribute) other.attributes.get(attName);
+ AttributeImpl otherAtt = (AttributeImpl) other.attributes.get(attName);
if (otherAtt == null || !otherAtt.equals(attributes.get(attName))) {
return false;
}
Index: src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 785337)
+++ src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy)
@@ -141,11 +141,11 @@
public TokenStream tokenStream(String fieldName, Reader reader) {
return new TokenFilter(new WhitespaceTokenizer(reader)) {
boolean first=true;
- AttributeSource state;
+ AttributeSource.State state;
public boolean incrementToken() throws IOException {
if (state != null) {
- state.restoreState(this);
+ restoreState(state);
payloadAtt.setPayload(null);
posIncrAtt.setPositionIncrement(0);
termAtt.setTermBuffer(new char[]{'b'}, 0, 1);
Index: src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 785337)
+++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -3540,14 +3540,14 @@
tokens.addAttribute(TermAttribute.class);
tokens.addAttribute(PositionIncrementAttribute.class);
- AttributeSource state = new AttributeSource();
+ TokenStream state = new TokenStream(){};
TermAttribute termAtt = (TermAttribute) state.addAttribute(TermAttribute.class);
PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class);
termAtt.setTermBuffer("a");
posIncrAtt.setPositionIncrement(0);
tokens.add(state);
- state = new AttributeSource();
+ state = new TokenStream(){};
termAtt = (TermAttribute) state.addAttribute(TermAttribute.class);
posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class);
@@ -3555,7 +3555,7 @@
posIncrAtt.setPositionIncrement(1);
tokens.add(state);
- state = new AttributeSource();
+ state = new TokenStream(){};
termAtt = (TermAttribute) state.addAttribute(TermAttribute.class);
posIncrAtt = (PositionIncrementAttribute) state.addAttribute(PositionIncrementAttribute.class);
Index: src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java
===================================================================
--- src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java (revision 785337)
+++ src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java (working copy)
@@ -320,6 +320,10 @@
public Token next(final Token reusableToken) {
return null;
}
+
+ public boolean incrementToken() {
+ return false;
+ }
}
}
Index: src/test/org/apache/lucene/util/LuceneTestCase.java
===================================================================
--- src/test/org/apache/lucene/util/LuceneTestCase.java (revision 785337)
+++ src/test/org/apache/lucene/util/LuceneTestCase.java (working copy)
@@ -44,7 +44,6 @@
protected void setUp() throws Exception {
ConcurrentMergeScheduler.setTestMode();
- TokenStream.setUseNewAPIDefault(true);
}
protected void tearDown() throws Exception {
@@ -80,9 +81,10 @@
*/
public abstract class TokenStream extends AttributeSource {
- private static boolean useNewAPIDefault = false;
- private boolean useNewAPI = useNewAPIDefault;
-
+
+ /** @deprecated */
+ private Token localToken;
+
protected TokenStream() {
super();
}
@@ -90,68 +92,47 @@
protected TokenStream(AttributeSource input) {
super(input);
}
-
- /**
- * Returns whether or not the new TokenStream APIs are used
- * by default.
- * (see {@link #incrementToken()}, {@link AttributeSource}).
- */
- public static boolean useNewAPIDefault() {
- return useNewAPIDefault;
- }
-
- /**
- * Use this API to enable or disable the new TokenStream API.
- * by default. Can be overridden by calling {@link #setUseNewAPI(boolean)}.
- * (see {@link #incrementToken()}, {@link AttributeSource}).
- *
- * public String toString() {
- * return "start=" + startOffset + ",end=" + endOffset;
- * }
- *
- */
- public abstract String toString();
-
- /**
- * Subclasses must implement this method and should compute
- * a hashCode similar to this:
- *
- * public int hashCode() {
- * int code = startOffset;
- * code = code * 31 + endOffset;
- * return code;
- * }
- *
- *
- * see also {@link #equals(Object)}
- */
- public abstract int hashCode();
-
- /**
- * All values used for computation of {@link #hashCode()}
- * should be checked here for equality.
- *
- * see also {@link Object#equals(Object)}
- */
- public abstract boolean equals(Object other);
-
- /**
- * Copies the values from this Attribute into the passed-in
- * target attribute. The type of the target must match the type
- * of this attribute.
- */
- public abstract void copyTo(Attribute target);
-
- /**
- * Shallow clone. Subclasses must override this if they
- * need to clone any members deeply,
- */
- public Object clone() {
- Object clone = null;
- try {
- clone = super.clone();
- } catch (CloneNotSupportedException e) {
- throw new RuntimeException(e); // shouldn't happen
- }
- return clone;
- }
}
Index: src/java/org/apache/lucene/util/AttributeImpl.java
===================================================================
--- src/java/org/apache/lucene/util/AttributeImpl.java (revision 0)
+++ src/java/org/apache/lucene/util/AttributeImpl.java (revision 0)
@@ -0,0 +1,123 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+import java.lang.reflect.Field;
+
+/**
+ * Base class for Attributes that can be added to a
+ * {@link org.apache.lucene.util.AttributeSource}.
+ *
+ * public String toString() {
+ * return "start=" + startOffset + ",end=" + endOffset;
+ * }
+ *
+ *
+ * This method may be overridden by subclasses.
+ */
+ public String toString() {
+ StringBuffer buffer = new StringBuffer();
+ Class clazz = this.getClass();
+ Field[] fields = clazz.getDeclaredFields();
+ try {
+ for (int i = 0; i < fields.length; i++) {
+ Field f = fields[i];
+ f.setAccessible(true);
+ Object value = f.get(this);
+ if (value == null) {
+ buffer.append(f.getName() + "=null");
+ } else {
+ buffer.append(f.getName() + "=" + value);
+ }
+ if (i < fields.length - 1) {
+ buffer.append(',');
+ }
+ }
+ } catch (IllegalAccessException e) {
+ // this should never happen, because we're just accessing fields
+ // from 'this'
+ throw new RuntimeException(e);
+ }
+
+ return buffer.toString();
+ }
+
+ /**
+ * Subclasses must implement this method and should compute
+ * a hashCode similar to this:
+ *
+ * public int hashCode() {
+ * int code = startOffset;
+ * code = code * 31 + endOffset;
+ * return code;
+ * }
+ *
+ *
+ * see also {@link #equals(Object)}
+ */
+ public abstract int hashCode();
+
+ /**
+ * All values used for computation of {@link #hashCode()}
+ * should be checked here for equality.
+ *
+ * see also {@link Object#equals(Object)}
+ */
+ public abstract boolean equals(Object other);
+
+ /**
+ * Copies the values from this Attribute into the passed-in
+ * target attribute. The type of the target must match the type
+ * of this attribute.
+ */
+ public abstract void copyTo(AttributeImpl target);
+
+ /**
+ * Shallow clone. Subclasses must override this if they
+ * need to clone any members deeply,
+ */
+ public Object clone() {
+ Object clone = null;
+ try {
+ clone = super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(e); // shouldn't happen
+ }
+ return clone;
+ }
+}
Index: src/java/org/apache/lucene/util/AttributeSource.java
===================================================================
--- src/java/org/apache/lucene/util/AttributeSource.java (revision 785340)
+++ src/java/org/apache/lucene/util/AttributeSource.java (working copy)
@@ -19,13 +19,14 @@
import java.util.Iterator;
import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
import java.util.Map;
+import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
-
/**
- * An AttributeSource contains a list of different {@link Attribute}s,
+ * An AttributeSource contains a list of different {@link AttributeImpl}s,
* and methods to add and get them. There can only be a single instance
* of an attribute in the same AttributeSource instance. This is ensured
* by passing in the actual type of the Attribute (Class<Attribute>) to
@@ -39,7 +40,103 @@
* We will make our best efforts to keep the APIs backwards-compatible.
*/
public class AttributeSource {
+ public static abstract class AttributeFactory {
+ public abstract AttributeImpl createAttributeInstance(Class attClass);
+
+ public static final AttributeFactory DefaultAttributeFactory = new AttributeFactory() {
+ public AttributeImpl createAttributeInstance(Class attClass) {
+ try {
+ return (AttributeImpl) resolveClassName(attClass.getName()).newInstance();
+ } catch (InstantiationException e) {
+ throw new IllegalArgumentException("Could not instantiate class " + attClass);
+ } catch (IllegalAccessException e) {
+ throw new IllegalArgumentException("Could not instantiate class " + attClass);
+ }
+ }
+
+ protected Class resolveClassName(String attClassName) {
+ try {
+ attClassName += "Impl";
+ return Class.forName(attClassName);
+ } catch (ClassNotFoundException e) {
+ throw new IllegalArgumentException("Could not find implementing class " + attClassName);
+ }
+ }
+ };
+ }
+
+ public static final class State implements Cloneable {
+ private AttributeImpl attribute;
+ private State next;
+
+ public Object clone() {
+ State clone = new State();
+ clone.attribute = (AttributeImpl) attribute.clone();
+
+ if (next != null) {
+ clone.next = (State) next.clone();
+ }
+
+ return clone;
+ }
+ }
+
+ private State currentState;
+
+ private void computeCurrentState() {
+ // find all unique AttributeImpls
+ Set impls = new LinkedHashSet();
+ impls.addAll(attributes.values());
+ Iterator it = impls.iterator();
+ currentState = new State();
+
+ State c = currentState;
+ c.attribute = (AttributeImpl) it.next();
+ while (it.hasNext()) {
+ c.next = new State();
+ c = c.next;
+ c.attribute = (AttributeImpl) it.next();
+ }
+ }
+
+ public State captureState() {
+ if (currentState == null) {
+ computeCurrentState();
+ }
+ return (State) this.currentState.clone();
+ }
+
/**
+ * Restores this state by copying the values of all attributes
+ * that this state contains into the attributes of the targetStream.
+ * The targetStream must contain a corresponding instance for each argument
+ * contained in this state.
+ *