Index: backwards/src/test/org/apache/lucene/analysis/TestToken.java =================================================================== --- backwards/src/test/org/apache/lucene/analysis/TestToken.java (revision 932329) +++ backwards/src/test/org/apache/lucene/analysis/TestToken.java (working copy) @@ -31,6 +31,7 @@ super(name); } + /* toString changed in 3.1: public void testCtor() throws Exception { Token t = new Token(); char[] content = "hello".toCharArray(); @@ -60,6 +61,7 @@ assertEquals("(hello,6,22,type=junk)", t.toString()); assertEquals(0, t.getFlags()); } + */ public void testResize() { Token t = new Token(); @@ -139,6 +141,7 @@ assertEquals(20000, t.termLength()); } + /* toString changed in 3.1: public void testToString() throws Exception { char[] b = {'a', 'l', 'o', 'h', 'a'}; Token t = new Token("", 0, 5); @@ -148,6 +151,7 @@ t.setTermBuffer("hi there"); assertEquals("(hi there,0,5)", t.toString()); } + */ public void testTermBufferEquals() throws Exception { Token t1a = new Token(); Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 932329) +++ CHANGES.txt (working copy) @@ -94,6 +94,13 @@ FSDirectory.FSIndexInput. Anyone extending this class will have to fix their code on upgrading. (Earwin Burrfoot via Mike McCandless) +* LUCENE-2302: The new interface for term attributes, CharTermAttribute, + now implements CharSequence. This requires the toString() methods of + CharTermAttribute, deprecated TermAttribute, and Token to return only + the term text and no other attribute contents. + TODO: Point to new attribute inspection API coming with LUCENE-2374. + (Uwe Schindler, Robert Muir) + Changes in runtime behavior * LUCENE-1923: Made IndexReader.toString() produce something @@ -186,6 +193,17 @@ deleted docs (getDeletedDocs), providing a new Bits interface to directly query by doc ID. +* LUCENE-2302: Deprecated TermAttribute and replaced by a new + CharTermAttribute. The change is backwards compatible, so + mixed new/old TokenStreams all work on the same char[] buffer + independent of which interface they use. CharTermAttribute + has shorter method names and implements CharSequence and + Appendable. This allows usage like Java's StringBuilder in + addition to direct char[] access. Also terms can directly be + used in places where CharSequence is allowed (e.g. regular + expressions). + (Uwe Schindler, Robert Muir) + Bug fixes * LUCENE-2119: Don't throw NegativeArraySizeException if you pass Index: src/java/org/apache/lucene/analysis/Token.java =================================================================== --- src/java/org/apache/lucene/analysis/Token.java (revision 932329) +++ src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -112,10 +112,14 @@
  • When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.
  • - +

    + Please note: With Lucene 3.1, the {@linkplain #toString toString()} method had to be changed to match the + {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}. + This method now only prints the term text, no additional information anymore. +

    @see org.apache.lucene.index.Payload */ -// TODO: change superclass to CharTermAttribute in 4.0! +// TODO: change superclass to CharTermAttribute in 4.0! Maybe deprecate the whole class? public class Token extends TermAttributeImpl implements TypeAttribute, PositionIncrementAttribute, FlagsAttribute, OffsetAttribute, PayloadAttribute { @@ -349,19 +353,6 @@ this.payload = payload; } - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append('(').append(super.toString()).append(',') - .append(startOffset).append(',').append(endOffset); - if (!"word".equals(type)) - sb.append(",type=").append(type); - if (positionIncrement != 1) - sb.append(",posIncr=").append(positionIncrement); - sb.append(')'); - return sb.toString(); - } - /** Resets the term text, payload, flags, and positionIncrement, * startOffset, endOffset and token type to default. */ Index: src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (revision 932329) +++ src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (working copy) @@ -20,6 +20,7 @@ import java.io.Serializable; import java.nio.CharBuffer; +import org.apache.lucene.analysis.Token; // for javadocs import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; @@ -224,6 +225,14 @@ return false; } + /** + * Returns solely the term text as specified by the + * {@link CharSequence} interface. + *

    This method changed the behavior with Lucene 3.1, + * before it returned a String representation of the whole + * term with all attributes. + * This affects especially the {@link Token} subclass. + */ @Override public String toString() { return new String(termBuffer, 0, termLength); Index: src/test/org/apache/lucene/analysis/TestToken.java =================================================================== --- src/test/org/apache/lucene/analysis/TestToken.java (revision 932329) +++ src/test/org/apache/lucene/analysis/TestToken.java (working copy) @@ -36,6 +36,8 @@ char[] content = "hello".toCharArray(); t.setTermBuffer(content, 0, content.length); assertNotSame(t.termBuffer(), content); + assertEquals(0, t.startOffset()); + assertEquals(0, t.endOffset()); assertEquals("hello", t.term()); assertEquals("word", t.type()); assertEquals(0, t.getFlags()); @@ -43,20 +45,28 @@ t = new Token(6, 22); t.setTermBuffer(content, 0, content.length); assertEquals("hello", t.term()); - assertEquals("(hello,6,22)", t.toString()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); assertEquals("word", t.type()); assertEquals(0, t.getFlags()); t = new Token(6, 22, 7); t.setTermBuffer(content, 0, content.length); assertEquals("hello", t.term()); - assertEquals("(hello,6,22)", t.toString()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); + assertEquals("word", t.type()); assertEquals(7, t.getFlags()); t = new Token(6, 22, "junk"); t.setTermBuffer(content, 0, content.length); assertEquals("hello", t.term()); - assertEquals("(hello,6,22,type=junk)", t.toString()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); + assertEquals("junk", t.type()); assertEquals(0, t.getFlags()); } @@ -142,10 +152,10 @@ char[] b = {'a', 'l', 'o', 'h', 'a'}; Token t = new Token("", 0, 5); t.setTermBuffer(b, 0, 5); - assertEquals("(aloha,0,5)", t.toString()); + assertEquals("aloha", t.toString()); t.setTermBuffer("hi there"); - assertEquals("(hi there,0,5)", t.toString()); + assertEquals("hi there", t.toString()); } public void testTermBufferEquals() throws Exception {