Index: src/java/org/apache/lucene/analysis/CharacterCache.java =================================================================== --- src/java/org/apache/lucene/analysis/CharacterCache.java (revision 826389) +++ src/java/org/apache/lucene/analysis/CharacterCache.java (working copy) @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.analysis; - -/** - * Replacement for Java 1.5 Character.valueOf() - * @deprecated Move to Character.valueOf() in 3.0 - */ -class CharacterCache { - - private static final Character cache[] = new Character[128]; - - static { - for (int i = 0; i < cache.length; i++) { - cache[i] = new Character((char) i); - } - } - - /** - * Returns a Character instance representing the given char value - * - * @param c - * a char value - * @return a Character representation of the given char value. - */ - public static Character valueOf(char c) { - if (c < cache.length) { - return cache[(int) c]; - } - return new Character(c); - } -} Index: src/java/org/apache/lucene/analysis/MappingCharFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/MappingCharFilter.java (revision 826389) +++ src/java/org/apache/lucene/analysis/MappingCharFilter.java (working copy) @@ -30,8 +30,7 @@ public class MappingCharFilter extends BaseCharFilter { private final NormalizeCharMap normMap; - //private LinkedList buffer; - private LinkedList buffer; + private LinkedList buffer; private String replacement; private int charPointer; private int nextCharCounter; @@ -57,7 +56,7 @@ int firstChar = nextChar(); if (firstChar == -1) return -1; NormalizeCharMap nm = normMap.submap != null ? - (NormalizeCharMap)normMap.submap.get(CharacterCache.valueOf((char) firstChar)) : null; + normMap.submap.get(Character.valueOf((char) firstChar)) : null; if (nm == null) return firstChar; NormalizeCharMap result = match(nm); if (result == null) return firstChar; @@ -78,7 +77,7 @@ private int nextChar() throws IOException { nextCharCounter++; if (buffer != null && !buffer.isEmpty()) { - return ((Character)buffer.removeFirst()).charValue(); + return buffer.removeFirst().charValue(); } return input.read(); } @@ -86,15 +85,15 @@ private void pushChar(int c) { nextCharCounter--; if(buffer == null) - buffer = new LinkedList(); - buffer.addFirst(new Character((char) c)); + buffer = new LinkedList(); + buffer.addFirst(Character.valueOf((char) c)); } private void pushLastChar(int c) { if (buffer == null) { - buffer = new LinkedList(); + buffer = new LinkedList(); } - buffer.addLast(new Character((char) c)); + buffer.addLast(Character.valueOf((char) c)); } private NormalizeCharMap match(NormalizeCharMap map) throws IOException { @@ -102,7 +101,7 @@ if (map.submap != null) { int chr = nextChar(); if (chr != -1) { - NormalizeCharMap subMap = (NormalizeCharMap) map.submap.get(CharacterCache.valueOf((char) chr)); + NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr)); if (subMap != null) { result = match(subMap); } Index: src/java/org/apache/lucene/analysis/NormalizeCharMap.java =================================================================== --- src/java/org/apache/lucene/analysis/NormalizeCharMap.java (revision 826389) +++ src/java/org/apache/lucene/analysis/NormalizeCharMap.java (working copy) @@ -45,10 +45,10 @@ if (currMap.submap == null) { currMap.submap = new HashMap(1); } - NormalizeCharMap map = currMap.submap.get(CharacterCache.valueOf(c)); + NormalizeCharMap map = currMap.submap.get(Character.valueOf(c)); if (map == null) { map = new NormalizeCharMap(); - currMap.submap.put(new Character(c), map); + currMap.submap.put(Character.valueOf(c), map); } currMap = map; } Index: src/java/org/apache/lucene/analysis/Token.java =================================================================== --- src/java/org/apache/lucene/analysis/Token.java (revision 826389) +++ src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -58,18 +58,6 @@ to easily switch from the old to the new TokenStream API.

- -

NOTE: As of 2.3, Token stores the term text - internally as a malleable char[] termBuffer instead of - String termText. The indexing code and core tokenizers - have been changed to re-use a single Token instance, changing - its buffer and other fields in-place as the Token is - processed. This provides substantially better indexing - performance as it saves the GC cost of new'ing a Token and - String for every term. The APIs that accept String - termText are still available but a warning about the - associated performance cost has been added (below). The - {@link #termText()} method has been deprecated.

Tokenizers and filters should try to re-use a Token instance when possible for best performance, by @@ -135,61 +123,13 @@ private static int MIN_BUFFER_SIZE = 10; - /** @deprecated We will remove this when we remove the - * deprecated APIs */ - private String termText; - - /** - * Characters for the term text. - * @deprecated This will be made private. Instead, use: - * {@link #termBuffer()}, - * {@link #setTermBuffer(char[], int, int)}, - * {@link #setTermBuffer(String)}, or - * {@link #setTermBuffer(String, int, int)} - */ - char[] termBuffer; - - /** - * Length of term text in the buffer. - * @deprecated This will be made private. Instead, use: - * {@link #termLength()}, or @{link setTermLength(int)}. - */ - int termLength; - - /** - * Start in source text. - * @deprecated This will be made private. Instead, use: - * {@link #startOffset()}, or @{link setStartOffset(int)}. - */ - int startOffset; - - /** - * End in source text. - * @deprecated This will be made private. Instead, use: - * {@link #endOffset()}, or @{link setEndOffset(int)}. - */ - int endOffset; - - /** - * The lexical type of the token. - * @deprecated This will be made private. Instead, use: - * {@link #type()}, or @{link setType(String)}. - */ - String type = DEFAULT_TYPE; - + private char[] termBuffer; + private int termLength; + private int startOffset,endOffset; + private String type = DEFAULT_TYPE; private int flags; - - /** - * @deprecated This will be made private. Instead, use: - * {@link #getPayload()}, or @{link setPayload(Payload)}. - */ - Payload payload; - - /** - * @deprecated This will be made private. Instead, use: - * {@link #getPositionIncrement()}, or @{link setPositionIncrement(String)}. - */ - int positionIncrement = 1; + private Payload payload; + private int positionIncrement = 1; /** Constructs a Token will null text. */ public Token() { @@ -236,10 +176,9 @@ * @param text term text * @param start start offset * @param end end offset - * @deprecated Use {@link #Token(char[], int, int, int, int)} instead. */ public Token(String text, int start, int end) { - termText = text; + setTermBuffer(text); startOffset = start; endOffset = end; } @@ -252,10 +191,9 @@ * @param start start offset * @param end end offset * @param typ token type - * @deprecated Use {@link #Token(char[], int, int, int, int)} and {@link #setType(String)} instead. */ public Token(String text, int start, int end, String typ) { - termText = text; + setTermBuffer(text); startOffset = start; endOffset = end; type = typ; @@ -270,10 +208,9 @@ * @param start * @param end * @param flags token type bits - * @deprecated Use {@link #Token(char[], int, int, int, int)} and {@link #setFlags(int)} instead. */ public Token(String text, int start, int end, int flags) { - termText = text; + setTermBuffer(text); startOffset = start; endOffset = end; this.flags = flags; @@ -335,34 +272,8 @@ return positionIncrement; } - /** Sets the Token's term text. NOTE: for better - * indexing speed you should instead use the char[] - * termBuffer methods to set the term text. - * @deprecated use {@link #setTermBuffer(char[], int, int)} or - * {@link #setTermBuffer(String)} or - * {@link #setTermBuffer(String, int, int)}. - */ - public void setTermText(String text) { - termText = text; - termBuffer = null; - } - /** Returns the Token's term text. * - * @deprecated This method now has a performance penalty - * because the text is stored internally in a char[]. If - * possible, use {@link #termBuffer()} and {@link - * #termLength()} directly instead. If you really need a - * String, use {@link #term()} - */ - public final String termText() { - if (termText == null && termBuffer != null) - termText = new String(termBuffer, 0, termLength); - return termText; - } - - /** Returns the Token's term text. - * * This method has a performance penalty * because the text is stored internally in a char[]. If * possible, use {@link #termBuffer()} and {@link @@ -371,8 +282,6 @@ * a convenience call to new String(token.termBuffer(), 0, token.termLength()) */ public final String term() { - if (termText != null) - return termText; initTermBuffer(); return new String(termBuffer, 0, termLength); } @@ -384,7 +293,6 @@ * @param length the number of characters to copy */ public final void setTermBuffer(char[] buffer, int offset, int length) { - termText = null; growTermBuffer(length); System.arraycopy(buffer, offset, termBuffer, 0, length); termLength = length; @@ -394,7 +302,6 @@ * @param buffer the buffer to copy */ public final void setTermBuffer(String buffer) { - termText = null; final int length = buffer.length(); growTermBuffer(length); buffer.getChars(0, length, termBuffer, 0); @@ -410,7 +317,6 @@ public final void setTermBuffer(String buffer, int offset, int length) { assert offset <= buffer.length(); assert offset + length <= buffer.length(); - termText = null; growTermBuffer(length); buffer.getChars(offset, offset + length, termBuffer, 0); termLength = length; @@ -441,17 +347,7 @@ public char[] resizeTermBuffer(int newSize) { if (termBuffer == null) { // The buffer is always at least MIN_BUFFER_SIZE - newSize = newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize; - //Preserve termText - if (termText != null) { - final int ttLen = termText.length(); - newSize = newSize < ttLen ? ttLen : newSize; - termBuffer = new char[ArrayUtil.getNextSize(newSize)]; - termText.getChars(0, termText.length(), termBuffer, 0); - termText = null; - } else { // no term Text, the first allocation - termBuffer = new char[ArrayUtil.getNextSize(newSize)]; - } + termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)]; } else { if(termBuffer.length < newSize){ // Not big enough; create a new array with slight @@ -481,25 +377,10 @@ } } - - // TODO: once we remove the deprecated termText() method - // and switch entirely to char[] termBuffer we don't need - // to use this method anymore, only for late init of the buffer private void initTermBuffer() { if (termBuffer == null) { - if (termText == null) { - termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)]; - termLength = 0; - } else { - int length = termText.length(); - if (length < MIN_BUFFER_SIZE) length = MIN_BUFFER_SIZE; - termBuffer = new char[ArrayUtil.getNextSize(length)]; - termLength = termText.length(); - termText.getChars(0, termText.length(), termBuffer, 0); - termText = null; - } - } else { - termText = null; + termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)]; + termLength = 0; } } @@ -528,7 +409,7 @@ corresponding to this token in the source text. Note that the difference between endOffset() and startOffset() may not be - equal to termText.length(), as the term text may have been altered by a + equal to {@link #termLength}, as the term text may have been altered by a stemmer or some other filter. */ public final int startOffset() { return startOffset; @@ -630,7 +511,6 @@ payload = null; // Leave termBuffer to allow re-use termLength = 0; - termText = null; positionIncrement = 1; flags = 0; startOffset = endOffset = 0; Index: src/test/org/apache/lucene/analysis/TestToken.java =================================================================== --- src/test/org/apache/lucene/analysis/TestToken.java (revision 826389) +++ src/test/org/apache/lucene/analysis/TestToken.java (working copy) @@ -150,7 +150,7 @@ t.setTermBuffer(b, 0, 5); assertEquals("(aloha,0,5)", t.toString()); - t.setTermText("hi there"); + t.setTermBuffer("hi there"); assertEquals("(hi there,0,5)", t.toString()); } @@ -171,20 +171,17 @@ public void testMixedStringArray() throws Exception { Token t = new Token("hello", 0, 5); - assertEquals(t.termText(), "hello"); assertEquals(t.termLength(), 5); assertEquals(t.term(), "hello"); - t.setTermText("hello2"); + t.setTermBuffer("hello2"); assertEquals(t.termLength(), 6); assertEquals(t.term(), "hello2"); t.setTermBuffer("hello3".toCharArray(), 0, 6); - assertEquals(t.termText(), "hello3"); + assertEquals(t.term(), "hello3"); - // Make sure if we get the buffer and change a character - // that termText() reflects the change char[] buffer = t.termBuffer(); buffer[1] = 'o'; - assertEquals(t.termText(), "hollo3"); + assertEquals(t.term(), "hollo3"); } public void testClone() throws Exception {