Index: src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (revision 821446) +++ src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (working copy) @@ -50,11 +50,6 @@ * * * See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode - * - * The set of character conversions supported by this class is a superset of - * those supported by Lucene's {@link ISOLatin1AccentFilter} which strips - * accents from Latin1 characters. For example, 'à' will be replaced by - * 'a'. */ public final class ASCIIFoldingFilter extends TokenFilter { public ASCIIFoldingFilter(TokenStream input) Index: src/java/org/apache/lucene/analysis/CachingTokenFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/CachingTokenFilter.java (revision 821446) +++ src/java/org/apache/lucene/analysis/CachingTokenFilter.java (working copy) @@ -33,7 +33,7 @@ * {@link TokenStream#reset()}, which repositions the * stream to the first Token. */ -public class CachingTokenFilter extends TokenFilter { +public final class CachingTokenFilter extends TokenFilter { private List cache = null; private Iterator iterator = null; private AttributeSource.State finalState; @@ -41,18 +41,6 @@ public CachingTokenFilter(TokenStream input) { super(input); } - - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next(final Token reusableToken) throws IOException { - return super.next(reusableToken); - } - - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next() throws IOException { - return super.next(); - } public final boolean incrementToken() throws IOException { if (cache == null) { Index: src/java/org/apache/lucene/analysis/CharTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/CharTokenizer.java (revision 821446) +++ src/java/org/apache/lucene/analysis/CharTokenizer.java (working copy) @@ -114,18 +114,6 @@ offsetAtt.setOffset(finalOffset, finalOffset); } - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next(final Token reusableToken) throws IOException { - return super.next(reusableToken); - } - - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next() throws IOException { - return super.next(); - } - public void reset(Reader input) throws IOException { super.reset(input); bufferIndex = 0; Index: src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java (revision 821446) +++ src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java (working copy) @@ -1,269 +0,0 @@ -package org.apache.lucene.analysis; - -import org.apache.lucene.analysis.tokenattributes.TermAttribute; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A filter that replaces accented characters in the ISO Latin 1 character set - * (ISO-8859-1) by their unaccented equivalent. The case will not be altered. - *

- * For instance, 'à' will be replaced by 'a'. - *

- * - * @deprecated in favor of {@link ASCIIFoldingFilter} which covers a superset - * of Latin 1. This class will be removed in Lucene 3.0. - */ -public class ISOLatin1AccentFilter extends TokenFilter { - public ISOLatin1AccentFilter(TokenStream input) { - super(input); - termAtt = addAttribute(TermAttribute.class); - } - - private char[] output = new char[256]; - private int outputPos; - private TermAttribute termAtt; - - public final boolean incrementToken() throws java.io.IOException { - if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - final int length = termAtt.termLength(); - // If no characters actually require rewriting then we - // just return token as-is: - for(int i=0;i= '\u00c0' && c <= '\uFB06') { - removeAccents(buffer, length); - termAtt.setTermBuffer(output, 0, outputPos); - break; - } - } - return true; - } else - return false; - } - - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next(final Token reusableToken) throws java.io.IOException { - return super.next(reusableToken); - } - - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next() throws java.io.IOException { - return super.next(); - } - - /** - * To replace accented characters in a String by unaccented equivalents. - */ - public final void removeAccents(char[] input, int length) { - - // Worst-case length required: - final int maxSizeNeeded = 2*length; - - int size = output.length; - while (size < maxSizeNeeded) - size *= 2; - - if (size != output.length) - output = new char[size]; - - outputPos = 0; - - int pos = 0; - - for (int i=0; i '\uFB06') - output[outputPos++] = c; - else { - switch (c) { - case '\u00C0' : // À - case '\u00C1' : // Á - case '\u00C2' : //  - case '\u00C3' : // à - case '\u00C4' : // Ä - case '\u00C5' : // Å - output[outputPos++] = 'A'; - break; - case '\u00C6' : // Æ - output[outputPos++] = 'A'; - output[outputPos++] = 'E'; - break; - case '\u00C7' : // Ç - output[outputPos++] = 'C'; - break; - case '\u00C8' : // È - case '\u00C9' : // É - case '\u00CA' : // Ê - case '\u00CB' : // Ë - output[outputPos++] = 'E'; - break; - case '\u00CC' : // Ì - case '\u00CD' : // Í - case '\u00CE' : // Î - case '\u00CF' : // Ï - output[outputPos++] = 'I'; - break; - case '\u0132' : // IJ - output[outputPos++] = 'I'; - output[outputPos++] = 'J'; - break; - case '\u00D0' : // Ð - output[outputPos++] = 'D'; - break; - case '\u00D1' : // Ñ - output[outputPos++] = 'N'; - break; - case '\u00D2' : // Ò - case '\u00D3' : // Ó - case '\u00D4' : // Ô - case '\u00D5' : // Õ - case '\u00D6' : // Ö - case '\u00D8' : // Ø - output[outputPos++] = 'O'; - break; - case '\u0152' : // Œ - output[outputPos++] = 'O'; - output[outputPos++] = 'E'; - break; - case '\u00DE' : // Þ - output[outputPos++] = 'T'; - output[outputPos++] = 'H'; - break; - case '\u00D9' : // Ù - case '\u00DA' : // Ú - case '\u00DB' : // Û - case '\u00DC' : // Ü - output[outputPos++] = 'U'; - break; - case '\u00DD' : // Ý - case '\u0178' : // Ÿ - output[outputPos++] = 'Y'; - break; - case '\u00E0' : // à - case '\u00E1' : // á - case '\u00E2' : // â - case '\u00E3' : // ã - case '\u00E4' : // ä - case '\u00E5' : // å - output[outputPos++] = 'a'; - break; - case '\u00E6' : // æ - output[outputPos++] = 'a'; - output[outputPos++] = 'e'; - break; - case '\u00E7' : // ç - output[outputPos++] = 'c'; - break; - case '\u00E8' : // è - case '\u00E9' : // é - case '\u00EA' : // ê - case '\u00EB' : // ë - output[outputPos++] = 'e'; - break; - case '\u00EC' : // ì - case '\u00ED' : // í - case '\u00EE' : // î - case '\u00EF' : // ï - output[outputPos++] = 'i'; - break; - case '\u0133' : // ij - output[outputPos++] = 'i'; - output[outputPos++] = 'j'; - break; - case '\u00F0' : // ð - output[outputPos++] = 'd'; - break; - case '\u00F1' : // ñ - output[outputPos++] = 'n'; - break; - case '\u00F2' : // ò - case '\u00F3' : // ó - case '\u00F4' : // ô - case '\u00F5' : // õ - case '\u00F6' : // ö - case '\u00F8' : // ø - output[outputPos++] = 'o'; - break; - case '\u0153' : // œ - output[outputPos++] = 'o'; - output[outputPos++] = 'e'; - break; - case '\u00DF' : // ß - output[outputPos++] = 's'; - output[outputPos++] = 's'; - break; - case '\u00FE' : // þ - output[outputPos++] = 't'; - output[outputPos++] = 'h'; - break; - case '\u00F9' : // ù - case '\u00FA' : // ú - case '\u00FB' : // û - case '\u00FC' : // ü - output[outputPos++] = 'u'; - break; - case '\u00FD' : // ý - case '\u00FF' : // ÿ - output[outputPos++] = 'y'; - break; - case '\uFB00': // ff - output[outputPos++] = 'f'; - output[outputPos++] = 'f'; - break; - case '\uFB01': // fi - output[outputPos++] = 'f'; - output[outputPos++] = 'i'; - break; - case '\uFB02': // fl - output[outputPos++] = 'f'; - output[outputPos++] = 'l'; - break; - // following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive) -// case '\uFB03': // ffi -// output[outputPos++] = 'f'; -// output[outputPos++] = 'f'; -// output[outputPos++] = 'i'; -// break; -// case '\uFB04': // ffl -// output[outputPos++] = 'f'; -// output[outputPos++] = 'f'; -// output[outputPos++] = 'l'; -// break; - case '\uFB05': // ſt - output[outputPos++] = 'f'; - output[outputPos++] = 't'; - break; - case '\uFB06': // st - output[outputPos++] = 's'; - output[outputPos++] = 't'; - break; - default : - output[outputPos++] = c; - break; - } - } - } - } -} Index: src/java/org/apache/lucene/analysis/KeywordTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision 821446) +++ src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy) @@ -27,7 +27,7 @@ /** * Emits the entire input as a single token. */ -public class KeywordTokenizer extends Tokenizer { +public final class KeywordTokenizer extends Tokenizer { private static final int DEFAULT_BUFFER_SIZE = 256; @@ -88,18 +88,6 @@ offsetAtt.setOffset(finalOffset, finalOffset); } - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next(final Token reusableToken) throws IOException { - return super.next(reusableToken); - } - - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next() throws IOException { - return super.next(); - } - public void reset(Reader input) throws IOException { super.reset(input); this.done = false; Index: src/java/org/apache/lucene/analysis/SinkTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/SinkTokenizer.java (revision 821446) +++ src/java/org/apache/lucene/analysis/SinkTokenizer.java (working copy) @@ -1,110 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.analysis; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - - -/** - * A SinkTokenizer can be used to cache Tokens for use in an Analyzer - *

- * WARNING: {@link TeeTokenFilter} and {@link SinkTokenizer} only work with the old TokenStream API. - * If you switch to the new API, you need to use {@link TeeSinkTokenFilter} instead, which offers - * the same functionality. - * @see TeeTokenFilter - * @deprecated Use {@link TeeSinkTokenFilter} instead - * - **/ -public class SinkTokenizer extends Tokenizer { - protected List/**/ lst = new ArrayList/**/(); - protected Iterator/**/ iter; - - public SinkTokenizer(List/**/ input) { - this.lst = input; - if (this.lst == null) this.lst = new ArrayList/**/(); - } - - public SinkTokenizer() { - this.lst = new ArrayList/**/(); - } - - public SinkTokenizer(int initCap){ - this.lst = new ArrayList/**/(initCap); - } - - /** - * Get the tokens in the internal List. - *

- * WARNING: Adding tokens to this list requires the {@link #reset()} method to be called in order for them - * to be made available. Also, this Tokenizer does nothing to protect against {@link java.util.ConcurrentModificationException}s - * in the case of adds happening while {@link #next(org.apache.lucene.analysis.Token)} is being called. - *

- * WARNING: Since this SinkTokenizer can be reset and the cached tokens made available again, do not modify them. Modify clones instead. - * - * @return A List of {@link org.apache.lucene.analysis.Token}s - */ - public List/**/ getTokens() { - return lst; - } - - /** - * Returns the next token out of the list of cached tokens - * @return The next {@link org.apache.lucene.analysis.Token} in the Sink. - * @throws IOException - */ - public Token next(final Token reusableToken) throws IOException { - assert reusableToken != null; - if (iter == null) iter = lst.iterator(); - // Since this TokenStream can be reset we have to maintain the tokens as immutable - if (iter.hasNext()) { - Token nextToken = (Token) iter.next(); - return (Token) nextToken.clone(); - } - return null; - } - - /** - * Override this method to cache only certain tokens, or new tokens based - * on the old tokens. - * - * @param t The {@link org.apache.lucene.analysis.Token} to add to the sink - */ - public void add(Token t) { - if (t == null) return; - lst.add((Token) t.clone()); - } - - public void close() throws IOException { - //nothing to close - input = null; - lst = null; - } - - /** - * Reset the internal data structures to the start at the front of the list of tokens. Should be called - * if tokens were added to the list after an invocation of {@link #next(Token)} - * @throws IOException - */ - public void reset() throws IOException { - iter = lst.iterator(); - } -} - Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 821446) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -45,7 +45,7 @@ * directory to your project and maintaining your own grammar-based tokenizer. */ -public class StandardTokenizer extends Tokenizer { +public final class StandardTokenizer extends Tokenizer { /** A private instance of the JFlex-constructed scanner */ private final StandardTokenizerImpl scanner; @@ -209,18 +209,6 @@ offsetAtt.setOffset(finalOffset, finalOffset); } - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next(final Token reusableToken) throws IOException { - return super.next(reusableToken); - } - - /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should - * not be overridden. Delegates to the backwards compatibility layer. */ - public final Token next() throws IOException { - return super.next(); - } - /* * (non-Javadoc) * Index: src/java/org/apache/lucene/analysis/TeeTokenFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/TeeTokenFilter.java (revision 821446) +++ src/java/org/apache/lucene/analysis/TeeTokenFilter.java (working copy) @@ -1,80 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.analysis; - -import java.io.IOException; - - -/** - * Works in conjunction with the SinkTokenizer to provide the ability to set aside tokens - * that have already been analyzed. This is useful in situations where multiple fields share - * many common analysis steps and then go their separate ways. - *

- * It is also useful for doing things like entity extraction or proper noun analysis as - * part of the analysis workflow and saving off those tokens for use in another field. - * - *

-SinkTokenizer sink1 = new SinkTokenizer();
-SinkTokenizer sink2 = new SinkTokenizer();
-
-TokenStream source1 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader1), sink1), sink2);
-TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader2), sink1), sink2);
-
-TokenStream final1 = new LowerCaseFilter(source1);
-TokenStream final2 = source2;
-TokenStream final3 = new EntityDetect(sink1);
-TokenStream final4 = new URLDetect(sink2);
-
-d.add(new Field("f1", final1));
-d.add(new Field("f2", final2));
-d.add(new Field("f3", final3));
-d.add(new Field("f4", final4));
- * 
- * In this example, sink1 and sink2 will both get tokens from both - * reader1 and reader2 after whitespace tokenizer - * and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. - * It is important, that tees are consumed before sinks (in the above example, the field names must be - * less the sink's field names). - * Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene -

- * - * See LUCENE-1058. - *

- * WARNING: {@link TeeTokenFilter} and {@link SinkTokenizer} only work with the old TokenStream API. - * If you switch to the new API, you need to use {@link TeeSinkTokenFilter} instead, which offers - * the same functionality. - - * @see SinkTokenizer - * @deprecated Use {@link TeeSinkTokenFilter} instead - **/ -public class TeeTokenFilter extends TokenFilter { - SinkTokenizer sink; - - public TeeTokenFilter(TokenStream input, SinkTokenizer sink) { - super(input); - this.sink = sink; - } - - public Token next(final Token reusableToken) throws IOException { - assert reusableToken != null; - Token nextToken = input.next(reusableToken); - sink.add(nextToken); - return nextToken; - } - -} Index: src/java/org/apache/lucene/analysis/Token.java =================================================================== --- src/java/org/apache/lucene/analysis/Token.java (revision 821446) +++ src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -867,9 +867,6 @@ if (payload !=null) { to.payload = (Payload) payload.clone(); } - // remove the following optimization in 3.0 when old TokenStream API removed: - } else if (target instanceof TokenWrapper) { - ((TokenWrapper) target).delegate = (Token) this.clone(); } else { initTermBuffer(); ((TermAttribute) target).setTermBuffer(termBuffer, 0, termLength); Index: src/java/org/apache/lucene/analysis/TokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenStream.java (revision 821446) +++ src/java/org/apache/lucene/analysis/TokenStream.java (working copy) @@ -18,18 +18,10 @@ */ import java.io.IOException; -import java.util.IdentityHashMap; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Payload; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; @@ -86,95 +78,11 @@ */ public abstract class TokenStream extends AttributeSource { - /** @deprecated Remove this when old API is removed! */ - private static final AttributeFactory DEFAULT_TOKEN_WRAPPER_ATTRIBUTE_FACTORY - = new TokenWrapperAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); - - /** @deprecated Remove this when old API is removed! */ - private final TokenWrapper tokenWrapper; - - /** @deprecated Remove this when old API is removed! */ - private static boolean onlyUseNewAPI = false; - - /** @deprecated Remove this when old API is removed! */ - private final MethodSupport supportedMethods = getSupportedMethods(this.getClass()); - - /** @deprecated Remove this when old API is removed! */ - private static final class MethodSupport { - final boolean hasIncrementToken, hasReusableNext, hasNext; - - MethodSupport(Class clazz) { - hasIncrementToken = isMethodOverridden(clazz, "incrementToken"); - hasReusableNext = isMethodOverridden(clazz, "next", Token.class); - hasNext = isMethodOverridden(clazz, "next"); - } - - private static boolean isMethodOverridden(Class clazz, String name, Class... params) { - try { - return clazz.getMethod(name, params).getDeclaringClass() != TokenStream.class; - } catch (NoSuchMethodException e) { - // should not happen - throw new RuntimeException(e); - } - } - } - - /** @deprecated Remove this when old API is removed! */ - private static final IdentityHashMap,MethodSupport> knownMethodSupport = - new IdentityHashMap,MethodSupport>(); - - /** @deprecated Remove this when old API is removed! */ - private static MethodSupport getSupportedMethods(Class clazz) { - MethodSupport supportedMethods; - synchronized(knownMethodSupport) { - supportedMethods = knownMethodSupport.get(clazz); - if (supportedMethods == null) { - knownMethodSupport.put(clazz, supportedMethods = new MethodSupport(clazz)); - } - } - return supportedMethods; - } - - /** @deprecated Remove this when old API is removed! */ - private static final class TokenWrapperAttributeFactory extends AttributeFactory { - private final AttributeFactory delegate; - - private TokenWrapperAttributeFactory(AttributeFactory delegate) { - this.delegate = delegate; - } - - public AttributeImpl createAttributeInstance(Class attClass) { - return attClass.isAssignableFrom(TokenWrapper.class) - ? new TokenWrapper() - : delegate.createAttributeInstance(attClass); - } - - // this is needed for TeeSinkTokenStream's check for compatibility of AttributeSource, - // so two TokenStreams using old API have the same AttributeFactory wrapped by this one. - public boolean equals(Object other) { - if (this == other) return true; - if (other instanceof TokenWrapperAttributeFactory) { - final TokenWrapperAttributeFactory af = (TokenWrapperAttributeFactory) other; - return this.delegate.equals(af.delegate); - } - return false; - } - - public int hashCode() { - return delegate.hashCode() ^ 0x0a45ff31; - } - } - /** * A TokenStream using the default attribute factory. */ protected TokenStream() { - super(onlyUseNewAPI - ? AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY - : TokenStream.DEFAULT_TOKEN_WRAPPER_ATTRIBUTE_FACTORY - ); - tokenWrapper = initTokenWrapper(null); - check(); + super(); } /** @@ -182,105 +90,16 @@ */ protected TokenStream(AttributeSource input) { super(input); - tokenWrapper = initTokenWrapper(input); - check(); } /** * A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances. */ protected TokenStream(AttributeFactory factory) { - super(onlyUseNewAPI - ? factory - : new TokenWrapperAttributeFactory(factory) - ); - tokenWrapper = initTokenWrapper(null); - check(); + super(factory); } - - /** @deprecated Remove this when old API is removed! */ - private TokenWrapper initTokenWrapper(AttributeSource input) { - if (onlyUseNewAPI) { - // no wrapper needed - return null; - } else { - // if possible get the wrapper from the filter's input stream - if (input instanceof TokenStream && ((TokenStream) input).tokenWrapper != null) { - return ((TokenStream) input).tokenWrapper; - } - // check that all attributes are implemented by the same TokenWrapper instance - final Attribute att = addAttribute(TermAttribute.class); - if (att instanceof TokenWrapper && - addAttribute(TypeAttribute.class) == att && - addAttribute(PositionIncrementAttribute.class) == att && - addAttribute(FlagsAttribute.class) == att && - addAttribute(OffsetAttribute.class) == att && - addAttribute(PayloadAttribute.class) == att - ) { - return (TokenWrapper) att; - } else { - throw new UnsupportedOperationException( - "If onlyUseNewAPI is disabled, all basic Attributes must be implemented by the internal class "+ - "TokenWrapper. Please make sure, that all TokenStreams/TokenFilters in this chain have been "+ - "instantiated with this flag disabled and do not add any custom instances for the basic Attributes!" - ); - } - } - } - - /** @deprecated Remove this when old API is removed! */ - private void check() { - if (onlyUseNewAPI && !supportedMethods.hasIncrementToken) { - throw new UnsupportedOperationException(getClass().getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI."); - } - - // a TokenStream subclass must at least implement one of the methods! - if (!(supportedMethods.hasIncrementToken || supportedMethods.hasNext || supportedMethods.hasReusableNext)) { - throw new UnsupportedOperationException(getClass().getName()+" does not implement any of incrementToken(), next(Token), next()."); - } - } /** - * For extra performance you can globally enable the new - * {@link #incrementToken} API using {@link Attribute}s. There will be a - * small, but in most cases negligible performance increase by enabling this, - * but it only works if all TokenStreams use the new API and - * implement {@link #incrementToken}. This setting can only be enabled - * globally. - *

- * This setting only affects TokenStreams instantiated after this - * call. All TokenStreams already created use the other setting. - *

- * All core {@link Analyzer}s are compatible with this setting, if you have - * your own TokenStreams that are also compatible, you should enable - * this. - *

- * When enabled, tokenization may throw {@link UnsupportedOperationException} - * s, if the whole tokenizer chain is not compatible eg one of the - * TokenStreams does not implement the new TokenStream API. - *

- * The default is false, so there is the fallback to the old API - * available. - * - * @deprecated This setting will no longer be needed in Lucene 3.0 as the old - * API will be removed. - */ - public static void setOnlyUseNewAPI(boolean onlyUseNewAPI) { - TokenStream.onlyUseNewAPI = onlyUseNewAPI; - } - - /** - * Returns if only the new API is used. - * - * @see #setOnlyUseNewAPI - * @deprecated This setting will no longer be needed in Lucene 3.0 as - * the old API will be removed. - */ - public static boolean getOnlyUseNewAPI() { - return onlyUseNewAPI; - } - - /** * Consumers (ie {@link IndexWriter}) use this method to advance the stream to * the next token. Implementing classes must implement this method and update * the appropriate {@link AttributeImpl}s with the attributes of the next @@ -303,25 +122,8 @@ * {@link #incrementToken()}. * * @return false for end of stream; true otherwise - * - *

- * Note that this method will be defined abstract in Lucene - * 3.0. */ - public boolean incrementToken() throws IOException { - assert tokenWrapper != null; - - final Token token; - if (supportedMethods.hasReusableNext) { - token = next(tokenWrapper.delegate); - } else { - assert supportedMethods.hasNext; - token = next(); - } - if (token == null) return false; - tokenWrapper.delegate = token; - return true; - } + public abstract boolean incrementToken() throws IOException; /** * This method is called by the consumer after the last token has been @@ -341,83 +143,6 @@ } /** - * Returns the next token in the stream, or null at EOS. When possible, the - * input Token should be used as the returned Token (this gives fastest - * tokenization performance), but this is not required and a new Token may be - * returned. Callers may re-use a single Token instance for successive calls - * to this method. - *

- * This implicitly defines a "contract" between consumers (callers of this - * method) and producers (implementations of this method that are the source - * for tokens): - *

- * Also, the producer must make no assumptions about a {@link Token} after it - * has been returned: the caller may arbitrarily change it. If the producer - * needs to hold onto the {@link Token} for subsequent calls, it must clone() - * it before storing it. Note that a {@link TokenFilter} is considered a - * consumer. - * - * @param reusableToken a {@link Token} that may or may not be used to return; - * this parameter should never be null (the callee is not required to - * check for null before using it, but it is a good idea to assert that - * it is not null.) - * @return next {@link Token} in the stream or null if end-of-stream was hit - * @deprecated The new {@link #incrementToken()} and {@link AttributeSource} - * APIs should be used instead. - */ - public Token next(final Token reusableToken) throws IOException { - assert reusableToken != null; - - if (tokenWrapper == null) - throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API."); - - if (supportedMethods.hasIncrementToken) { - tokenWrapper.delegate = reusableToken; - return incrementToken() ? tokenWrapper.delegate : null; - } else { - assert supportedMethods.hasNext; - return next(); - } - } - - /** - * Returns the next {@link Token} in the stream, or null at EOS. - * - * @deprecated The returned Token is a "full private copy" (not re-used across - * calls to {@link #next()}) but will be slower than calling - * {@link #next(Token)} or using the new {@link #incrementToken()} - * method with the new {@link AttributeSource} API. - */ - public Token next() throws IOException { - if (tokenWrapper == null) - throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API."); - - final Token nextToken; - if (supportedMethods.hasIncrementToken) { - final Token savedDelegate = tokenWrapper.delegate; - tokenWrapper.delegate = new Token(); - nextToken = incrementToken() ? tokenWrapper.delegate : null; - tokenWrapper.delegate = savedDelegate; - } else { - assert supportedMethods.hasReusableNext; - nextToken = next(new Token()); - } - - if (nextToken != null) { - Payload p = nextToken.getPayload(); - if (p != null) { - nextToken.setPayload((Payload) p.clone()); - } - } - return nextToken; - } - - /** * Resets this stream to the beginning. This is an optional operation, so * subclasses may or may not implement this method. {@link #reset()} is not needed for * the standard indexing process. However, if the tokens of a Index: src/java/org/apache/lucene/analysis/TokenWrapper.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenWrapper.java (revision 821446) +++ src/java/org/apache/lucene/analysis/TokenWrapper.java (working copy) @@ -1,166 +0,0 @@ -package org.apache.lucene.analysis; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.index.Payload; -import org.apache.lucene.util.AttributeImpl; - -/** - * This class wraps a Token and supplies a single attribute instance - * where the delegate token can be replaced. - * @deprecated Will be removed, when old TokenStream API is removed. - */ -final class TokenWrapper extends AttributeImpl - implements Cloneable, TermAttribute, TypeAttribute, PositionIncrementAttribute, - FlagsAttribute, OffsetAttribute, PayloadAttribute { - - Token delegate; - - TokenWrapper() { - this(new Token()); - } - - TokenWrapper(Token delegate) { - this.delegate = delegate; - } - - // TermAttribute: - - public String term() { - return delegate.term(); - } - - public void setTermBuffer(char[] buffer, int offset, int length) { - delegate.setTermBuffer(buffer, offset, length); - } - - public void setTermBuffer(String buffer) { - delegate.setTermBuffer(buffer); - } - - public void setTermBuffer(String buffer, int offset, int length) { - delegate.setTermBuffer(buffer, offset, length); - } - - public char[] termBuffer() { - return delegate.termBuffer(); - } - - public char[] resizeTermBuffer(int newSize) { - return delegate.resizeTermBuffer(newSize); - } - - public int termLength() { - return delegate.termLength(); - } - - public void setTermLength(int length) { - delegate.setTermLength(length); - } - - // TypeAttribute: - - public String type() { - return delegate.type(); - } - - public void setType(String type) { - delegate.setType(type); - } - - public void setPositionIncrement(int positionIncrement) { - delegate.setPositionIncrement(positionIncrement); - } - - public int getPositionIncrement() { - return delegate.getPositionIncrement(); - } - - // FlagsAttribute - - public int getFlags() { - return delegate.getFlags(); - } - - public void setFlags(int flags) { - delegate.setFlags(flags); - } - - // OffsetAttribute - - public int startOffset() { - return delegate.startOffset(); - } - - public void setOffset(int startOffset, int endOffset) { - delegate.setOffset(startOffset, endOffset); - } - - public int endOffset() { - return delegate.endOffset(); - } - - // PayloadAttribute - - public Payload getPayload() { - return delegate.getPayload(); - } - - public void setPayload(Payload payload) { - delegate.setPayload(payload); - } - - // AttributeImpl - - public void clear() { - delegate.clear(); - } - - public String toString() { - return delegate.toString(); - } - - public int hashCode() { - return delegate.hashCode(); - } - - public boolean equals(Object other) { - if (other instanceof TokenWrapper) { - return ((TokenWrapper) other).delegate.equals(this.delegate); - } - return false; - } - - public Object clone() { - return new TokenWrapper((Token) delegate.clone()); - } - - public void copyTo(AttributeImpl target) { - if (target instanceof TokenWrapper) { - ((TokenWrapper) target).delegate = (Token) this.delegate.clone(); - } else { - this.delegate.copyTo(target); - } - } -}