Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 940593) +++ lucene/CHANGES.txt (working copy) @@ -8,6 +8,7 @@ - o.a.l.analysis.PorterStemFilter -> o.a.l.analysis.en.PorterStemFilter - o.a.l.analysis.ASCIIFoldingFilter -> o.a.l.analysis.miscellaneous.ASCIIFoldingFilter - o.a.l.analysis.ISOLatin1AccentFilter -> o.a.l.analysis.miscellaneous.ISOLatin1AccentFilter + - o.a.l.analysis.TeeSinkTokenFilter -> o.a.l.analysis.sinks.TeeSinkTokenFilter ... (in progress) * LUCENE-1458, LUCENE-2111, LUCENE-2354: Changes from flexible indexing: Index: lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (deleted) =================================================================== Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 940593) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -39,7 +39,6 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; @@ -4182,32 +4181,6 @@ r.close(); dir.close(); } - - // LUCENE-1448 - public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception { - MockRAMDirectory dir = new MockRAMDirectory(); - Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); - Document doc = new Document(); - TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); - TokenStream sink = tee.newSinkTokenStream(); - Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS); - Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f1); - doc.add(f2); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); - assertEquals(2, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - assertEquals(8, termOffsets[1].getStartOffset()); - assertEquals(12, termOffsets[1].getEndOffset()); - r.close(); - dir.close(); - } // LUCENE-1448 public void testEndOffsetPositionStopFilter() throws Exception { Index: lucene/src/java/org/apache/lucene/analysis/TeeSinkTokenFilter.java (deleted) =================================================================== Index: lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java =================================================================== --- lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (revision 940593) +++ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (working copy) @@ -20,11 +20,9 @@ import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; @@ -39,7 +37,7 @@ String test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)))); - SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter); + TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter); boolean seenDogs = false; Index: lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java =================================================================== --- lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (revision 940593) +++ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (working copy) @@ -22,9 +22,7 @@ import java.util.Locale; import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream; public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase { @@ -37,7 +35,7 @@ DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US)); String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006"; TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))); - SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter); + TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter); int count = 0; tee.reset(); Index: lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (revision 940593) +++ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.analysis; +package org.apache.lucene.analysis.sinks; /** * Copyright 2004 The Apache Software Foundation @@ -16,10 +16,26 @@ * limitations under the License. */ +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.TermPositionVector; +import org.apache.lucene.index.TermVectorOffsetInfo; +import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.English; import java.io.IOException; @@ -72,6 +88,33 @@ } }; + // LUCENE-1448 + // TODO: instead of testing it this way, we can test + // with BaseTokenStreamTestCase now... + public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception { + MockRAMDirectory dir = new MockRAMDirectory(); + Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); + TokenStream sink = tee.newSinkTokenStream(); + Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS); + Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f1); + doc.add(f2); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); + assertEquals(2, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + assertEquals(8, termOffsets[1].getStartOffset()); + assertEquals(12, termOffsets[1].getEndOffset()); + r.close(); + dir.close(); + } public void testGeneral() throws IOException { final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer1.toString()))); Index: lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java =================================================================== --- lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (revision 940593) +++ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (working copy) @@ -20,9 +20,7 @@ import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream; public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase { @@ -35,7 +33,7 @@ TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4); String test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))); - SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter); + TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter); int count = 0; tee.reset(); Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java (revision 940593) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java (working copy) @@ -21,7 +21,6 @@ import java.text.ParseException; import java.util.Date; -import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkFilter; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.AttributeSource; @@ -31,7 +30,7 @@ *

* **/ -public class DateRecognizerSinkFilter extends SinkFilter { +public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter { public static final String DATE_TYPE = "date"; protected DateFormat dateFormat; Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java (revision 940593) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java (working copy) @@ -19,14 +19,13 @@ import java.io.IOException; -import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkFilter; import org.apache.lucene.util.AttributeSource; /** * Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper * **/ -public class TokenRangeSinkFilter extends SinkFilter { +public class TokenRangeSinkFilter extends TeeSinkTokenFilter.SinkFilter { private int lower; private int upper; private int count; Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java (revision 940593) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.analysis; +package org.apache.lucene.analysis.sinks; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -23,6 +23,8 @@ import java.util.LinkedList; import java.util.List; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java (revision 940593) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java (working copy) @@ -17,11 +17,10 @@ * limitations under the License. */ -import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkFilter; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; -public class TokenTypeSinkFilter extends SinkFilter { +public class TokenTypeSinkFilter extends TeeSinkTokenFilter.SinkFilter { private String typeToMatch; private TypeAttribute typeAtt; Index: lucene/backwards/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (deleted) =================================================================== Index: lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 940593) +++ lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -36,7 +36,6 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; @@ -4188,32 +4187,6 @@ r.close(); dir.close(); } - - // LUCENE-1448 - public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception { - MockRAMDirectory dir = new MockRAMDirectory(); - Analyzer analyzer = new WhitespaceAnalyzer(); - IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); - Document doc = new Document(); - TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); - TokenStream sink = tee.newSinkTokenStream(); - Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS); - Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f1); - doc.add(f2); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); - assertEquals(2, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - assertEquals(8, termOffsets[1].getStartOffset()); - assertEquals(12, termOffsets[1].getEndOffset()); - r.close(); - dir.close(); - } // LUCENE-1448 public void testEndOffsetPositionStopFilter() throws Exception { Index: lucene/backwards/src/java/org/apache/lucene/analysis/TeeSinkTokenFilter.java (deleted) ===================================================================