? analyzer.patch ? build ? dist ? src/java/org/apache/lucene/analysis/AbstractTokenFilter.java ? src/java/org/apache/lucene/analysis/AbstractTokenizer.java ? src/java/org/apache/lucene/analysis/BaseAnalyzer.java ? src/test/org/apache/lucene/analysis/TestBaseAnalyzer.java Index: src/java/org/apache/lucene/analysis/CharTokenizer.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java,v retrieving revision 1.5 diff -u -r1.5 CharTokenizer.java --- src/java/org/apache/lucene/analysis/CharTokenizer.java 29 Mar 2004 22:48:00 -0000 1.5 +++ src/java/org/apache/lucene/analysis/CharTokenizer.java 3 Apr 2004 21:36:10 -0000 @@ -19,10 +19,7 @@ import java.io.Reader; /** An abstract base class for simple, character-oriented tokenizers.*/ -public abstract class CharTokenizer extends Tokenizer { - public CharTokenizer(Reader input) { - super(input); - } +public abstract class CharTokenizer extends AbstractTokenizer implements Tokenizer { private int offset = 0, bufferIndex = 0, dataLen = 0; private static final int MAX_WORD_LEN = 255; @@ -36,6 +33,14 @@ * define token boundaries and are not included in tokens. */ protected abstract boolean isTokenChar(char c); + public CharTokenizer() { + + } + + public CharTokenizer(Reader input) { + setReader(input); + } + /** Called on each token character to normalize it before it is added to the * token. The default implementation does nothing. Subclasses may use this * to, e.g., lowercase tokens. */ @@ -52,7 +57,7 @@ offset++; if (bufferIndex >= dataLen) { - dataLen = input.read(ioBuffer); + dataLen = reader.read(ioBuffer); bufferIndex = 0; } ; Index: src/java/org/apache/lucene/analysis/LetterTokenizer.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/LetterTokenizer.java,v retrieving revision 1.4 diff -u -r1.4 LetterTokenizer.java --- src/java/org/apache/lucene/analysis/LetterTokenizer.java 29 Mar 2004 22:48:00 -0000 1.4 +++ src/java/org/apache/lucene/analysis/LetterTokenizer.java 3 Apr 2004 21:36:10 -0000 @@ -26,6 +26,11 @@ job for some Asian languages, where words are not separated by spaces. */ public class LetterTokenizer extends CharTokenizer { + + public LetterTokenizer() { + super(); + } + /** Construct a new LetterTokenizer. */ public LetterTokenizer(Reader in) { super(in); Index: src/java/org/apache/lucene/analysis/LowerCaseFilter.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java,v retrieving revision 1.4 diff -u -r1.4 LowerCaseFilter.java --- src/java/org/apache/lucene/analysis/LowerCaseFilter.java 29 Mar 2004 22:48:00 -0000 1.4 +++ src/java/org/apache/lucene/analysis/LowerCaseFilter.java 3 Apr 2004 21:36:10 -0000 @@ -23,9 +23,13 @@ * * @version $Id: LowerCaseFilter.java,v 1.4 2004/03/29 22:48:00 cutting Exp $ */ -public final class LowerCaseFilter extends TokenFilter { +public final class LowerCaseFilter extends AbstractTokenFilter implements TokenFilter { + public LowerCaseFilter(){ + + } + public LowerCaseFilter(TokenStream in) { - super(in); + setTokenStream(in); } public final Token next() throws IOException { Index: src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java,v retrieving revision 1.5 diff -u -r1.5 LowerCaseTokenizer.java --- src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java 29 Mar 2004 22:48:00 -0000 1.5 +++ src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java 3 Apr 2004 21:36:10 -0000 @@ -29,6 +29,11 @@ * job for some Asian languages, where words are not separated by spaces. */ public final class LowerCaseTokenizer extends LetterTokenizer { + + public LowerCaseTokenizer() { + + } + /** Construct a new LowerCaseTokenizer. */ public LowerCaseTokenizer(Reader in) { super(in); Index: src/java/org/apache/lucene/analysis/PorterStemFilter.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java,v retrieving revision 1.5 diff -u -r1.5 PorterStemFilter.java --- src/java/org/apache/lucene/analysis/PorterStemFilter.java 29 Mar 2004 22:48:00 -0000 1.5 +++ src/java/org/apache/lucene/analysis/PorterStemFilter.java 3 Apr 2004 21:36:10 -0000 @@ -36,13 +36,16 @@ } */ -public final class PorterStemFilter extends TokenFilter { +public final class PorterStemFilter extends AbstractTokenFilter implements TokenFilter { private PorterStemmer stemmer; - public PorterStemFilter(TokenStream in) { - super(in); + public PorterStemFilter() { stemmer = new PorterStemmer(); } + + public PorterStemFilter(TokenStream in){ + setTokenStream(in); + } /** Returns the next input Token, after being stemmed */ public final Token next() throws IOException { Index: src/java/org/apache/lucene/analysis/StopFilter.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/StopFilter.java,v retrieving revision 1.12 diff -u -r1.12 StopFilter.java --- src/java/org/apache/lucene/analysis/StopFilter.java 29 Mar 2004 22:48:00 -0000 1.12 +++ src/java/org/apache/lucene/analysis/StopFilter.java 3 Apr 2004 21:36:10 -0000 @@ -25,42 +25,51 @@ * Removes stop words from a token stream. */ -public final class StopFilter extends TokenFilter { +public final class StopFilter extends AbstractTokenFilter implements TokenFilter { private Set stopWords; + public StopFilter(String[] stopWords) { + this(null, stopWords); + } + + public StopFilter(Set stopWords) { + this(null, stopWords); + } + + /** * Constructs a filter which removes words from the input * TokenStream that are named in the array of words. */ public StopFilter(TokenStream in, String[] stopWords) { - super(in); - this.stopWords = makeStopSet(stopWords); + setTokenStream(in); + this.stopWords = makeStopSet(stopWords); } - /** - * Constructs a filter which removes words from the input - * TokenStream that are named in the Hashtable. - * - * @deprecated Use {@link #StopFilter(TokenStream, Set)} instead - */ - public StopFilter(TokenStream in, Hashtable stopTable) { - super(in); - stopWords = new HashSet(stopTable.keySet()); - } + /** + * Constructs a filter which removes words from the input + * TokenStream that are named in the Hashtable. + * + * @deprecated Use {@link #StopFilter(TokenStream, Set)} instead + */ + public StopFilter(TokenStream in, Hashtable stopTable) { + setTokenStream(in); + stopWords = new HashSet(stopTable.keySet()); + } - /** - * Constructs a filter which removes words from the input - * TokenStream that are named in the Set. - * It is crucial that an efficient Set implementation is used - * for maximum performance. - * - * @see #makeStopSet(java.lang.String[]) - */ - public StopFilter(TokenStream in, Set stopWords) { - super(in); - this.stopWords = stopWords; - } + /** + * Constructs a filter which removes words from the input + * TokenStream that are named in the Set. + * It is crucial that an efficient Set implementation is used + * for maximum performance. + * + * @see #makeStopSet(java.lang.String[]) + */ + public StopFilter(TokenStream in, Set stopWords) { + setTokenStream(in); + this.stopWords = stopWords; + } /** * Builds a Hashtable from an array of stop words, Index: src/java/org/apache/lucene/analysis/TokenFilter.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/TokenFilter.java,v retrieving revision 1.4 diff -u -r1.4 TokenFilter.java --- src/java/org/apache/lucene/analysis/TokenFilter.java 29 Mar 2004 22:48:00 -0000 1.4 +++ src/java/org/apache/lucene/analysis/TokenFilter.java 3 Apr 2004 21:36:11 -0000 @@ -23,23 +23,11 @@ This is an abstract class. */ -public abstract class TokenFilter extends TokenStream { - /** The source of tokens for this filter. */ - protected TokenStream input; - - /** Call TokenFilter(TokenStream) instead. - * @deprecated */ - protected TokenFilter() {} - - /** Construct a token stream filtering the given input. */ - protected TokenFilter(TokenStream input) { - this.input = input; - } - - /** Close the input TokenStream. */ - public void close() throws IOException { - input.close(); - } +public interface TokenFilter extends TokenStream { + /** + * Set the previous Token Stream, if there was one + */ + public void setTokenStream(TokenStream previousTokenStream); } Index: src/java/org/apache/lucene/analysis/TokenStream.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/TokenStream.java,v retrieving revision 1.3 diff -u -r1.3 TokenStream.java --- src/java/org/apache/lucene/analysis/TokenStream.java 29 Mar 2004 22:48:00 -0000 1.3 +++ src/java/org/apache/lucene/analysis/TokenStream.java 3 Apr 2004 21:36:11 -0000 @@ -18,22 +18,34 @@ import java.io.IOException; -/** A TokenStream enumerates the sequence of tokens, either from - fields of a document or from query text. -
- This is an abstract class. Concrete subclasses are: -
The returned token's type is set to an element of {@link @@ -82,13 +107,6 @@ jj_la1_0 = new int[] {0x10ff,}; } - public StandardTokenizer(CharStream stream) { - token_source = new StandardTokenizerTokenManager(stream); - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 1; i++) jj_la1[i] = -1; - } public void ReInit(CharStream stream) { token_source.ReInit(stream); @@ -98,13 +116,7 @@ for (int i = 0; i < 1; i++) jj_la1[i] = -1; } - public StandardTokenizer(StandardTokenizerTokenManager tm) { - token_source = tm; - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 1; i++) jj_la1[i] = -1; - } + public void ReInit(StandardTokenizerTokenManager tm) { token_source = tm; Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v retrieving revision 1.25 diff -u -r1.25 TestQueryParser.java --- src/test/org/apache/lucene/queryParser/TestQueryParser.java 3 Mar 2004 12:07:13 -0000 1.25 +++ src/test/org/apache/lucene/queryParser/TestQueryParser.java 3 Apr 2004 21:36:25 -0000 @@ -17,13 +17,8 @@ */ import junit.framework.TestCase; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseTokenizer; -import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateField; import org.apache.lucene.search.BooleanQuery; @@ -46,13 +41,13 @@ public static Analyzer qpAnalyzer = new QPTestAnalyzer(); - public static class QPTestFilter extends TokenFilter { + public static class QPTestFilter extends AbstractTokenFilter { /** * Filter which discards the token 'stop' and which expands the * token 'phrase' into 'phrase1 phrase2' */ public QPTestFilter(TokenStream in) { - super(in); + setTokenStream(in); } boolean inPhrase = false; Index: src/test/org/apache/lucene/search/TestPositionIncrement.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java,v retrieving revision 1.4 diff -u -r1.4 TestPositionIncrement.java --- src/test/org/apache/lucene/search/TestPositionIncrement.java 29 Mar 2004 22:48:06 -0000 1.4 +++ src/test/org/apache/lucene/search/TestPositionIncrement.java 3 Apr 2004 21:36:26 -0000 @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; - +import org.apache.lucene.analysis.AbstractTokenFilter; import java.io.Reader; import java.io.IOException; import java.io.StringReader; @@ -50,6 +50,10 @@ private final String[] TOKENS = {"1", "2", "3", "4", "5"}; private final int[] INCREMENTS = {1, 2, 1, 0, 1}; private int i = 0; + + public void close() throws IOException { + + } public Token next() throws IOException { if (i == TOKENS.length)