Index: src/java/org/apache/lucene/analysis/Token.java =================================================================== --- src/java/org/apache/lucene/analysis/Token.java (revision 920388) +++ src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -351,7 +351,7 @@ @Override public String toString() { final StringBuilder sb = new StringBuilder(); - sb.append('(').append(term()).append(',') + sb.append('(').append(super.toString()).append(',') .append(startOffset).append(',').append(endOffset); if (!"word".equals(type)) sb.append(",type=").append(type); Index: src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java (revision 0) @@ -0,0 +1,71 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Attribute; + +/** + * The term text of a Token. + */ +public interface CharTermAttribute extends Attribute, CharSequence, Appendable { + + /** Copies the contents of buffer, starting at offset for + * length characters, into the termBuffer array. + * @param buffer the buffer to copy + * @param offset the index in the buffer of the first character to copy + * @param length the number of characters to copy + */ + public void copyBuffer(char[] buffer, int offset, int length); + + /** Returns the internal termBuffer character array which + * you can then directly alter. If the array is too + * small for your token, use {@link + * #resizeBuffer(int)} to increase it. After + * altering the buffer be sure to call {@link + * #setLength} to record the number of valid + * characters that were placed into the termBuffer. */ + public char[] buffer(); + + /** Grows the termBuffer to at least size newSize, preserving the + * existing content. + * @param newSize minimum size of the new termBuffer + * @return newly created termBuffer with length >= newSize + */ + public char[] resizeBuffer(int newSize); + + /** Set number of valid characters (length of the term) in + * the termBuffer array. Use this to truncate the termBuffer + * or to synchronize with external manipulation of the termBuffer. + * Note: to grow the size of the array, + * use {@link #resizeBuffer(int)} first. + * @param length the truncated length + */ + public CharTermAttribute setLength(int length); + + /** Sets the length of the termBuffer to zero. + * Use this method before appending contents + * using the {@link Appendable} interface. + */ + public CharTermAttribute setEmpty(); + + // the following methods are redefined to get rid of IOException declaration: + public CharTermAttribute append(CharSequence csq); + public CharTermAttribute append(CharSequence csq, int start, int end); + public CharTermAttribute append(char c); + +} Property changes on: src\java\org\apache\lucene\analysis\tokenattributes\CharTermAttribute.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (revision 0) +++ src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (revision 0) @@ -0,0 +1,274 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; +import java.nio.CharBuffer; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.UnicodeUtil; + +/** + * The term text of a Token. + */ +public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermAttribute, TermToBytesRefAttribute, Cloneable, Serializable { + private static int MIN_BUFFER_SIZE = 10; + + private char[] termBuffer; + private int termLength; + + @Deprecated + public String term() { + // don't delegate to toString() here! + initTermBuffer(); + return new String(termBuffer, 0, termLength); + } + + public void copyBuffer(char[] buffer, int offset, int length) { + growTermBuffer(length); + System.arraycopy(buffer, offset, termBuffer, 0, length); + termLength = length; + } + + @Deprecated + public void setTermBuffer(char[] buffer, int offset, int length) { + copyBuffer(buffer, offset, length); + } + + @Deprecated + public void setTermBuffer(String buffer) { + int length = buffer.length(); + growTermBuffer(length); + buffer.getChars(0, length, termBuffer, 0); + termLength = length; + } + + @Deprecated + public void setTermBuffer(String buffer, int offset, int length) { + assert offset <= buffer.length(); + assert offset + length <= buffer.length(); + growTermBuffer(length); + buffer.getChars(offset, offset + length, termBuffer, 0); + termLength = length; + } + + public char[] buffer() { + initTermBuffer(); + return termBuffer; + } + + @Deprecated + public char[] termBuffer() { + return buffer(); + } + + public char[] resizeBuffer(int newSize) { + if (termBuffer == null) { + // The buffer is always at least MIN_BUFFER_SIZE + termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; + } else { + if(termBuffer.length < newSize){ + // Not big enough; create a new array with slight + // over allocation and preserve content + final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; + System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length); + termBuffer = newCharBuffer; + } + } + return termBuffer; + } + + @Deprecated + public char[] resizeTermBuffer(int newSize) { + return resizeBuffer(newSize); + } + + private void growTermBuffer(int newSize) { + if (termBuffer == null) { + // The buffer is always at least MIN_BUFFER_SIZE + termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; + } else { + if(termBuffer.length < newSize){ + // Not big enough; create a new array with slight + // over allocation: + termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; + } + } + } + + private void initTermBuffer() { + if (termBuffer == null) { + termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)]; + termLength = 0; + } + } + + @Deprecated + public int termLength() { + return termLength; + } + + public CharTermAttribute setLength(int length) { + initTermBuffer(); + if (length > termBuffer.length) + throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")"); + termLength = length; + return this; + } + + public CharTermAttribute setEmpty() { + termLength = 0; + return this; + } + + @Deprecated + public void setTermLength(int length) { + setLength(length); + } + + // *** TermToBytesRefAttribute interface *** + public int toBytesRef(BytesRef target) { + // noncommit: Maybe assume that bytes is already initialized? TermsHashPerField ensure this. + if (target.bytes == null) { + target.bytes = new byte[termLength * 4]; + } + return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, target); + } + + // *** CharSequence interface *** + public int length() { + return termLength; + } + + public char charAt(int index) { + if (index >= termLength) + throw new IndexOutOfBoundsException(); + initTermBuffer(); + return termBuffer[index]; + } + + public CharSequence subSequence(final int start, final int end) { + if (start > termLength || end > termLength || start > end || start < 0 || end < 0) + throw new IndexOutOfBoundsException(); + initTermBuffer(); + return new String(termBuffer, start, end - start); + } + + // *** Appendable interface *** + public CharTermAttribute append(CharSequence csq) { + return append(csq, 0, csq.length()); + } + + public CharTermAttribute append(CharSequence csq, int start, int end) { + resizeBuffer(termLength + end - start); + if (csq instanceof String) { + ((String) csq).getChars(start, end, termBuffer, termLength); + } else if (csq instanceof StringBuilder) { + ((StringBuilder) csq).getChars(start, end, termBuffer, termLength); + } else if (csq instanceof StringBuffer) { + ((StringBuffer) csq).getChars(start, end, termBuffer, termLength); + } else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) { + final CharBuffer cb = (CharBuffer) csq; + System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, end - start); + } else { + while (start < end) + termBuffer[termLength++] = csq.charAt(start++); + // no fall-through here, as termLength is updated! + return this; + } + termLength += end - start; + return this; + } + + public CharTermAttribute append(char c) { + resizeBuffer(termLength + 1)[termLength++] = c; + return this; + } + + // *** AttributeImpl *** + + @Override + public int hashCode() { + initTermBuffer(); + int code = termLength; + code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength); + return code; + } + + @Override + public void clear() { + termLength = 0; + } + + @Override + public Object clone() { + CharTermAttributeImpl t = (CharTermAttributeImpl)super.clone(); + // Do a deep clone + if (termBuffer != null) { + t.termBuffer = termBuffer.clone(); + } + return t; + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other instanceof CharTermAttributeImpl) { + initTermBuffer(); + CharTermAttributeImpl o = ((CharTermAttributeImpl) other); + o.initTermBuffer(); + + if (termLength != o.termLength) + return false; + for(int i=0;i> it = clone.getAttributeClassesIterator(); - assertEquals("TermAttribute must be the first attribute", TermAttribute.class, it.next()); + assertEquals("FlagsAttribute must be the first attribute", FlagsAttribute.class, it.next()); assertEquals("TypeAttribute must be the second attribute", TypeAttribute.class, it.next()); assertFalse("No more attributes", it.hasNext()); - final TermAttribute termAtt2 = clone.getAttribute(TermAttribute.class); + final FlagsAttribute termAtt2 = clone.getAttribute(FlagsAttribute.class); final TypeAttribute typeAtt2 = clone.getAttribute(TypeAttribute.class); - assertNotSame("TermAttribute of original and clone must be different instances", termAtt2, termAtt); + assertNotSame("FlagsAttribute of original and clone must be different instances", termAtt2, termAtt); assertNotSame("TypeAttribute of original and clone must be different instances", typeAtt2, typeAtt); - assertEquals("TermAttribute of original and clone must be equal", termAtt2, termAtt); + assertEquals("FlagsAttribute of original and clone must be equal", termAtt2, termAtt); assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt); } public void testToStringAndMultiAttributeImplementations() { AttributeSource src = new AttributeSource(); - TermAttribute termAtt = src.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class); - termAtt.setTermBuffer("TestTerm"); + termAtt.append("TestTerm"); typeAtt.setType("TestType"); assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString()); Iterator it = src.getAttributeImplsIterator(); @@ -113,23 +113,23 @@ src = new AttributeSource(); src.addAttributeImpl(new Token()); - // this should not add a new attribute as Token implements TermAttribute, too - termAtt = src.addAttribute(TermAttribute.class); - assertTrue("TermAttribute should be implemented by Token", termAtt instanceof Token); + // this should not add a new attribute as Token implements CharTermAttribute, too + termAtt = src.addAttribute(CharTermAttribute.class); + assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token); // get the Token attribute and check, that it is the only one it = src.getAttributeImplsIterator(); Token tok = (Token) it.next(); assertFalse("There should be only one attribute implementation instance", it.hasNext()); - termAtt.setTermBuffer("TestTerm"); + termAtt.setEmpty().append("TestTerm"); assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString()); } public void testDefaultAttributeFactory() throws Exception { AttributeSource src = new AttributeSource(); - assertTrue("TermAttribute is not implemented by TermAttributeImpl", - src.addAttribute(TermAttribute.class) instanceof TermAttributeImpl); + assertTrue("CharTermAttribute is not implemented by CharTermAttributeImpl", + src.addAttribute(CharTermAttribute.class) instanceof CharTermAttributeImpl); assertTrue("OffsetAttribute is not implemented by OffsetAttributeImpl", src.addAttribute(OffsetAttribute.class) instanceof OffsetAttributeImpl); assertTrue("FlagsAttribute is not implemented by FlagsAttributeImpl",