Index: lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java (revision 1514517) +++ lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java (working copy) @@ -31,16 +31,19 @@ */ public final class BinaryTokenStream extends TokenStream { private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class); + private final BytesRef bytes; private boolean available = true; public BinaryTokenStream(BytesRef bytes) { - bytesAtt.setBytesRef(bytes); + this.bytes = bytes; } @Override public boolean incrementToken() { if (available) { + clearAttributes(); available = false; + bytesAtt.setBytesRef(bytes); return true; } return false; Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1514517) +++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -50,6 +50,7 @@ import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; @@ -72,6 +73,9 @@ import org.apache.lucene.util.SetOnce; import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.BasicAutomata; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.packed.PackedInts; import org.junit.Test; @@ -1900,6 +1904,65 @@ } } + // LUCENE-3849 + public void testStopwordsPosIncHole() throws Exception { + Directory dir = newDirectory(); + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader); + TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); + return new TokenStreamComponents(tokenizer, stream); + } + }; + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a); + Document doc = new Document(); + doc.add(new TextField("body", "just a", Field.Store.NO)); + doc.add(new TextField("body", "test of gaps", Field.Store.NO)); + iw.addDocument(doc); + IndexReader ir = iw.getReader(); + iw.close(); + IndexSearcher is = newSearcher(ir); + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term("body", "just"), 0); + pq.add(new Term("body", "test"), 2); + // body:"just ? test" + assertEquals(1, is.search(pq, 5).totalHits); + ir.close(); + dir.close(); + } + + // LUCENE-3849 + public void testStopwordsPosIncHole2() throws Exception { + // use two stopfilters for testing here + Directory dir = newDirectory(); + final Automaton secondSet = BasicAutomata.makeString("foobar"); + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader); + TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); + stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet)); + return new TokenStreamComponents(tokenizer, stream); + } + }; + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a); + Document doc = new Document(); + doc.add(new TextField("body", "just a foobar", Field.Store.NO)); + doc.add(new TextField("body", "test of gaps", Field.Store.NO)); + iw.addDocument(doc); + IndexReader ir = iw.getReader(); + iw.close(); + IndexSearcher is = newSearcher(ir); + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term("body", "just"), 0); + pq.add(new Term("body", "test"), 3); + // body:"just ? ? test" + assertEquals(1, is.search(pq, 5).totalHits); + ir.close(); + dir.close(); + } + // here we do better, there is no current segments file, so we don't delete anything. // however, if you actually go and make a commit, the next time you run indexwriter // this file will be gone. Index: lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java =================================================================== --- lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (revision 1514517) +++ lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (working copy) @@ -21,6 +21,7 @@ import java.io.Closeable; import java.lang.reflect.Modifier; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; @@ -159,11 +160,18 @@ * setting the final offset of a stream. The final offset of a stream might * differ from the offset of the last token eg in case one or more whitespaces * followed after the last token, but a WhitespaceTokenizer was used. + *

+ * Additionally any skipped positions (such as those removed by a stopfilter) + * can be applied to the position increment, or any adjustment of other + * attributes where the end-of-stream value may be important. * * @throws IOException If an I/O error occurs */ public void end() throws IOException { - // do nothing by default + clearAttributes(); // LUCENE-3849: don't consume dirty atts + if (hasAttribute(PositionIncrementAttribute.class)) { + getAttribute(PositionIncrementAttribute.class).setPositionIncrement(0); + } } /** Index: lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1514517) +++ lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -175,7 +175,10 @@ } // trigger streams to perform end-of-stream operations stream.end(); - + // TODO: maybe add some safety? then again, its already checked + // when we come back around to the field... + // nocommit + fieldState.position += posIncrAttribute.getPositionIncrement(); fieldState.offset += offsetAttribute.endOffset(); success2 = true; } finally { Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (revision 1514517) +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (working copy) @@ -264,7 +264,8 @@ } @Override - public final void end(){ + public final void end() throws IOException { + super.end(); offsetAtt.setOffset(getFinalOffset(),getFinalOffset()); } Index: lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java (revision 1514517) +++ lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java (working copy) @@ -49,7 +49,7 @@ // a hook for AssociationsDrillDownStream to add the associations payload to // the drill-down terms } - + @Override public final boolean incrementToken() throws IOException { if (current.length == 0) { Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (revision 1514517) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (working copy) @@ -555,12 +555,16 @@ private CharTermAttribute termAtt; private PositionIncrementAttribute posIncrAtt; private boolean returned; + private int val; + private final String word; + public SinglePositionTokenStream(String word) { termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); - termAtt.setEmpty().append(word); + this.word = word; returned = true; } + /** * Set the value we want to keep, as the position increment. * Note that when TermPositions.nextPosition() is later used to @@ -574,15 +578,21 @@ * This change is described in Lucene's JIRA: LUCENE-1542. */ public void set(int val) { - posIncrAtt.setPositionIncrement(val); + this.val = val; returned = false; } + @Override public boolean incrementToken() throws IOException { if (returned) { return false; } - return returned = true; + clearAttributes(); + posIncrAtt.setPositionIncrement(val); + termAtt.setEmpty(); + termAtt.append(word); + returned = true; + return true; } } Index: lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 1514517) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy) @@ -255,16 +255,33 @@ assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1); } } + if (ts.incrementToken()) { fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + termAtt.toString()); } + + // repeat our extra safety checks for end() + ts.clearAttributes(); + if (termAtt != null) termAtt.setEmpty().append("bogusTerm"); + if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243); + if (typeAtt != null) typeAtt.setType("bogusType"); + if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657); + if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653); + + checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before + ts.end(); + assertTrue("super.end()/clearAttributes() was not called correctly in end()", checkClearAtt.getAndResetClearCalled()); + if (finalOffset != null) { assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset()); } if (offsetAtt != null) { assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0); } + + // nocommit final pos inc too? + ts.close(); } Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (revision 1514517) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (working copy) @@ -244,6 +244,7 @@ @Override public void end() throws IOException { + super.end(); int finalOffset = correctOffset(off); offsetAtt.setOffset(finalOffset, finalOffset); // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false. Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (revision 1514517) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (working copy) @@ -58,7 +58,8 @@ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - + private int skippedPositions; + /** * Create a new MockTokenFilter. * @@ -76,7 +77,7 @@ // initial token with posInc=0 ever // return the first non-stop word found - int skippedPositions = 0; + skippedPositions = 0; while (input.incrementToken()) { if (!filter.run(termAtt.buffer(), 0, termAtt.length())) { posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); @@ -87,4 +88,16 @@ // reached EOS -- return false return false; } + + @Override + public void end() throws IOException { + super.end(); + posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); + } + + @Override + public void reset() throws IOException { + super.reset(); + skippedPositions = 0; + } } Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java (revision 1514517) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java (working copy) @@ -50,7 +50,6 @@ private final CharArraySet stopWords; private State endState; - private boolean ended; /** Sole constructor. */ public SuggestStopFilter(TokenStream input, CharArraySet stopWords) { @@ -61,28 +60,24 @@ @Override public void reset() throws IOException { super.reset(); - ended = false; endState = null; } @Override public void end() throws IOException { - if (!ended) { + if (endState == null) { super.end(); } else { // NOTE: we already called .end() from our .next() when // the stream was complete, so we do not call // super.end() here - - if (endState != null) { - restoreState(endState); - } + restoreState(endState); } } @Override public boolean incrementToken() throws IOException { - if (ended) { + if (endState != null) { return false; } @@ -101,8 +96,9 @@ // It was a stopword; skip it skippedPositions += posInc; } else { + clearAttributes(); input.end(); - ended = true; + endState = captureState(); int finalEndOffset = offsetAtt.endOffset(); assert finalEndOffset >= endOffset; if (finalEndOffset > endOffset) { @@ -112,7 +108,6 @@ } else { // No token separator after final token that // looked like a stop-word; don't filter it: - endState = captureState(); restoreState(sav); posIncAtt.setPositionIncrement(skippedPositions + posIncAtt.getPositionIncrement()); keywordAtt.setKeyword(true); Index: lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java =================================================================== --- lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (revision 1514517) +++ lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (working copy) @@ -120,7 +120,8 @@ } @Override - public void end() { + public void end() throws IOException { + super.end(); final int finalOffset = (length < 0) ? offset : offset + length; offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset)); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (working copy) @@ -80,7 +80,8 @@ } @Override - public final void end() { + public final void end() throws IOException { + super.end(); // set final offset offsetAtt.setOffset(finalOffset, finalOffset); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (working copy) @@ -309,7 +309,8 @@ } @Override - public void end() { + public void end() throws IOException { + super.end(); // set final offset final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength()); this.offsetAtt.setOffset(finalOffset, finalOffset); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (working copy) @@ -176,7 +176,8 @@ } @Override - public final void end() { + public final void end() throws IOException { + super.end(); // set final offset offsetAtt.setOffset(finalOffset, finalOffset); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (working copy) @@ -191,7 +191,8 @@ } @Override - public final void end() { + public final void end() throws IOException { + super.end(); // set final offset int finalOffset = correctOffset(charsRead); offsetAtt.setOffset(finalOffset, finalOffset); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (working copy) @@ -76,6 +76,8 @@ "", "", }; + + private int skippedPositions; private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; @@ -123,7 +125,7 @@ @Override public final boolean incrementToken() throws IOException { clearAttributes(); - int posIncr = 1; + skippedPositions = 0; while(true) { int tokenType = scanner.getNextToken(); @@ -133,7 +135,7 @@ } if (scanner.yylength() <= maxTokenLength) { - posIncrAtt.setPositionIncrement(posIncr); + posIncrAtt.setPositionIncrement(skippedPositions+1); scanner.getText(termAtt); final int start = scanner.yychar(); offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length())); @@ -142,19 +144,23 @@ } else // When we skip a too-long term, we still increment the // position increment - posIncr++; + skippedPositions++; } } @Override - public final void end() { + public final void end() throws IOException { + super.end(); // set final offset int finalOffset = correctOffset(scanner.yychar() + scanner.yylength()); offsetAtt.setOffset(finalOffset, finalOffset); + // adjust any skipped tokens + posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions); } - + @Override public void reset() throws IOException { scanner.yyreset(input); + skippedPositions = 0; } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (working copy) @@ -76,6 +76,8 @@ "", "" }; + + private int skippedPositions; private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; @@ -130,7 +132,7 @@ @Override public final boolean incrementToken() throws IOException { clearAttributes(); - int posIncr = 1; + skippedPositions = 0; while(true) { int tokenType = scanner.getNextToken(); @@ -140,7 +142,7 @@ } if (scanner.yylength() <= maxTokenLength) { - posIncrAtt.setPositionIncrement(posIncr); + posIncrAtt.setPositionIncrement(skippedPositions+1); scanner.getText(termAtt); final int start = scanner.yychar(); offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length())); @@ -155,19 +157,23 @@ } else // When we skip a too-long term, we still increment the // position increment - posIncr++; + skippedPositions++; } } @Override - public final void end() { + public final void end() throws IOException { + super.end(); // set final offset int finalOffset = correctOffset(scanner.yychar() + scanner.yylength()); offsetAtt.setOffset(finalOffset, finalOffset); + // adjust any skipped tokens + posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions); } @Override public void reset() throws IOException { scanner.yyreset(input); + skippedPositions = 0; } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -90,6 +90,8 @@ "", "" }; + + private int skippedPositions; private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; @@ -144,7 +146,7 @@ @Override public final boolean incrementToken() throws IOException { clearAttributes(); - int posIncr = 1; + skippedPositions = 0; while(true) { int tokenType = scanner.getNextToken(); @@ -154,7 +156,7 @@ } if (scanner.yylength() <= maxTokenLength) { - posIncrAtt.setPositionIncrement(posIncr); + posIncrAtt.setPositionIncrement(skippedPositions+1); scanner.getText(termAtt); final int start = scanner.yychar(); offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length())); @@ -163,19 +165,23 @@ } else // When we skip a too-long term, we still increment the // position increment - posIncr++; + skippedPositions++; } } @Override - public final void end() { + public final void end() throws IOException { + super.end(); // set final offset int finalOffset = correctOffset(scanner.yychar() + scanner.yylength()); offsetAtt.setOffset(finalOffset, finalOffset); + // adjust any skipped tokens + posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions); } @Override public void reset() throws IOException { scanner.yyreset(input); + skippedPositions = 0; } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (working copy) @@ -217,13 +217,15 @@ } @Override - public final void end() { + public final void end() throws IOException { + super.end(); assert bufferStart <= bufferEnd; int endOffset = offset; for (int i = bufferStart; i < bufferEnd; ++i) { endOffset += Character.charCount(buffer[i]); } endOffset = correctOffset(endOffset); + // set final offset offsetAtt.setOffset(endOffset, endOffset); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (working copy) @@ -142,7 +142,8 @@ } @Override - public final void end() { + public final void end() throws IOException { + super.end(); // set final offset offsetAtt.setOffset(finalOffset, finalOffset); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java (working copy) @@ -34,6 +34,7 @@ protected final Version version; private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private int skippedPositions; /** * Create a new {@link FilteringTokenFilter}. @@ -50,7 +51,7 @@ @Override public final boolean incrementToken() throws IOException { - int skippedPositions = 0; + skippedPositions = 0; while (input.incrementToken()) { if (accept()) { if (skippedPositions != 0) { @@ -68,6 +69,12 @@ @Override public void reset() throws IOException { super.reset(); + skippedPositions = 0; } + @Override + public void end() throws IOException { + super.end(); + posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); + } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (revision 1514517) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (working copy) @@ -130,7 +130,8 @@ } @Override - public void end() { + public void end() throws IOException { + super.end(); final int ofs = correctOffset(str.length()); offsetAtt.setOffset(ofs, ofs); } Index: lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java =================================================================== --- lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (revision 1514517) +++ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (working copy) @@ -107,8 +107,8 @@ @Override public void end() throws IOException { + super.end(); offsetAttr.setOffset(finalOffset, finalOffset); - super.end(); } Index: lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java =================================================================== --- lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (revision 1514517) +++ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (working copy) @@ -86,7 +86,7 @@ @Override public void end() throws IOException { + super.end(); offsetAttr.setOffset(finalOffset, finalOffset); - super.end(); } } Index: lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java =================================================================== --- lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (revision 1514517) +++ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (working copy) @@ -91,9 +91,4 @@ public void reset() throws IOException { iterator = null; } - - @Override - public void end() throws IOException { - iterator = null; - } } Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java =================================================================== --- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (revision 1514517) +++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (working copy) @@ -280,7 +280,8 @@ } @Override - public void end() { + public void end() throws IOException { + super.end(); // Set final offset int finalOffset = correctOffset(pos); offsetAtt.setOffset(finalOffset, finalOffset); Index: lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java =================================================================== --- lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (revision 1514517) +++ lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (working copy) @@ -112,7 +112,8 @@ } @Override - public void end() { + public void end() throws IOException { + super.end(); // set final offset final int finalOffset = correctOffset(tokenEnd); offsetAtt.setOffset(finalOffset, finalOffset); Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java (revision 1514517) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java (working copy) @@ -114,7 +114,6 @@ public PositionsTokenStream() { term = addAttribute(CharTermAttribute.class); - term.append(DOC_POSITIONS_TERM); payload = addAttribute(PayloadAttribute.class); offset = addAttribute(OffsetAttribute.class); } @@ -125,6 +124,8 @@ return false; } + clearAttributes(); + term.append(DOC_POSITIONS_TERM); payload.setPayload(new BytesRef(Integer.toString(pos))); offset.setOffset(off, off); --pos;