Index: backwards/src/test/org/apache/lucene/analysis/TestToken.java =================================================================== --- backwards/src/test/org/apache/lucene/analysis/TestToken.java (revision 932329) +++ backwards/src/test/org/apache/lucene/analysis/TestToken.java (working copy) @@ -31,6 +31,7 @@ super(name); } + /* toString changed in 3.1: public void testCtor() throws Exception { Token t = new Token(); char[] content = "hello".toCharArray(); @@ -60,6 +61,7 @@ assertEquals("(hello,6,22,type=junk)", t.toString()); assertEquals(0, t.getFlags()); } + */ public void testResize() { Token t = new Token(); @@ -139,6 +141,7 @@ assertEquals(20000, t.termLength()); } + /* toString changed in 3.1: public void testToString() throws Exception { char[] b = {'a', 'l', 'o', 'h', 'a'}; Token t = new Token("", 0, 5); @@ -148,6 +151,7 @@ t.setTermBuffer("hi there"); assertEquals("(hi there,0,5)", t.toString()); } + */ public void testTermBufferEquals() throws Exception { Token t1a = new Token(); Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 932329) +++ CHANGES.txt (working copy) @@ -94,6 +94,13 @@ FSDirectory.FSIndexInput. Anyone extending this class will have to fix their code on upgrading. (Earwin Burrfoot via Mike McCandless) +* LUCENE-2302: The new interface for term attributes, CharTermAttribute, + now implements CharSequence. This requires the toString() methods of + CharTermAttribute, deprecated TermAttribute, and Token to return only + the term text and no other attribute contents. + TODO: Point to new attribute inspection API coming with LUCENE-2374. + (Uwe Schindler, Robert Muir) + Changes in runtime behavior * LUCENE-1923: Made IndexReader.toString() produce something @@ -186,6 +193,17 @@ deleted docs (getDeletedDocs), providing a new Bits interface to directly query by doc ID. +* LUCENE-2302: Deprecated TermAttribute and replaced by a new + CharTermAttribute. The change is backwards compatible, so + mixed new/old TokenStreams all work on the same char[] buffer + independent of which interface they use. CharTermAttribute + has shorter method names and implements CharSequence and + Appendable. This allows usage like Java's StringBuilder in + addition to direct char[] access. Also terms can directly be + used in places where CharSequence is allowed (e.g. regular + expressions). + (Uwe Schindler, Robert Muir) + Bug fixes * LUCENE-2119: Don't throw NegativeArraySizeException if you pass Index: src/java/org/apache/lucene/analysis/Token.java =================================================================== --- src/java/org/apache/lucene/analysis/Token.java (revision 932329) +++ src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -112,10 +112,14 @@
+ Please note: With Lucene 3.1, the {@linkplain #toString toString()} method had to be changed to match the
+ {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
+ This method now only prints the term text, no additional information anymore.
+
This method changed the behavior with Lucene 3.1, + * before it returned a String representation of the whole + * term with all attributes. + * This affects especially the {@link Token} subclass. + */ @Override public String toString() { return new String(termBuffer, 0, termLength); Index: src/test/org/apache/lucene/analysis/TestToken.java =================================================================== --- src/test/org/apache/lucene/analysis/TestToken.java (revision 932329) +++ src/test/org/apache/lucene/analysis/TestToken.java (working copy) @@ -36,6 +36,8 @@ char[] content = "hello".toCharArray(); t.setTermBuffer(content, 0, content.length); assertNotSame(t.termBuffer(), content); + assertEquals(0, t.startOffset()); + assertEquals(0, t.endOffset()); assertEquals("hello", t.term()); assertEquals("word", t.type()); assertEquals(0, t.getFlags()); @@ -43,20 +45,28 @@ t = new Token(6, 22); t.setTermBuffer(content, 0, content.length); assertEquals("hello", t.term()); - assertEquals("(hello,6,22)", t.toString()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); assertEquals("word", t.type()); assertEquals(0, t.getFlags()); t = new Token(6, 22, 7); t.setTermBuffer(content, 0, content.length); assertEquals("hello", t.term()); - assertEquals("(hello,6,22)", t.toString()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); + assertEquals("word", t.type()); assertEquals(7, t.getFlags()); t = new Token(6, 22, "junk"); t.setTermBuffer(content, 0, content.length); assertEquals("hello", t.term()); - assertEquals("(hello,6,22,type=junk)", t.toString()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); + assertEquals("junk", t.type()); assertEquals(0, t.getFlags()); } @@ -142,10 +152,10 @@ char[] b = {'a', 'l', 'o', 'h', 'a'}; Token t = new Token("", 0, 5); t.setTermBuffer(b, 0, 5); - assertEquals("(aloha,0,5)", t.toString()); + assertEquals("aloha", t.toString()); t.setTermBuffer("hi there"); - assertEquals("(hi there,0,5)", t.toString()); + assertEquals("hi there", t.toString()); } public void testTermBufferEquals() throws Exception {