Index: lucene/core/src/test/org/apache/lucene/analysis/TestPosition.java =================================================================== --- lucene/core/src/test/org/apache/lucene/analysis/TestPosition.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/analysis/TestPosition.java (working copy) @@ -0,0 +1,37 @@ +package org.apache.lucene.analysis; + + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.junit.Ignore; + +/** + * Trivial position class. + */ +@Ignore +public class TestPosition extends LookaheadTokenFilter.Position { + private String fact; + + public String getFact() { + return fact; + } + + public void setFact(String fact) { + this.fact = fact; + } +} Property changes on: lucene/core/src/test/org/apache/lucene/analysis/TestPosition.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/test/org/apache/lucene/analysis/TrivialLookaheadFilter.java =================================================================== --- lucene/core/src/test/org/apache/lucene/analysis/TrivialLookaheadFilter.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/analysis/TrivialLookaheadFilter.java (working copy) @@ -0,0 +1,104 @@ +package org.apache.lucene.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +/** + * Simple example of a filter that seems to show some problems with LookaheadTokenFilter. + */ +final public class TrivialLookaheadFilter extends LookaheadTokenFilter { + + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + + private int insertUpto; + + protected TrivialLookaheadFilter(TokenStream input) { + super(input); + } + + @Override + protected TestPosition newPosition() { + return new TestPosition(); + } + + @Override + public boolean incrementToken() throws IOException { + // At the outset, getMaxPos is -1. So we'll peek. When we reach the end of the sentence and go to the + // first token of the next sentence, maxPos will be the prev sentence's end token, and we'll go again. + if (positions.getMaxPos() < outputPos) { + peekSentence(); + } + + return nextToken(); + } + + @Override + public void reset() throws IOException { + super.reset(); + insertUpto = -1; + } + + @Override + protected void afterPosition() throws IOException { + if (insertUpto < outputPos) { + insertToken(); + // replace term with 'improved' term. + clearAttributes(); + termAtt.setEmpty(); + posIncAtt.setPositionIncrement(0); + termAtt.append(positions.get(outputPos).getFact()); + offsetAtt.setOffset(positions.get(outputPos).startOffset, + positions.get(outputPos+1).endOffset); + insertUpto = outputPos; + } + } + + private void peekSentence() throws IOException { + List facts = new ArrayList(); + boolean haveSentence = false; + do { + if (peekToken()) { + + String term = new String(termAtt.buffer(), 0, termAtt.length()); + facts.add(term + "-huh?"); + if (".".equals(term)) { + haveSentence = true; + } + + } else { + haveSentence = true; + } + + } while (!haveSentence); + + // attach the (now disambiguated) analyzed tokens to the positions. + for (int x = 0; x < facts.size(); x++) { + // sentenceTokens is just relative to sentence, positions is absolute. + positions.get(outputPos + x).setFact(facts.get(x)); + } + } +} Property changes on: lucene/core/src/test/org/apache/lucene/analysis/TrivialLookaheadFilter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/test/org/apache/lucene/analysis/TestLookaheadTokenFilter.java =================================================================== --- lucene/core/src/test/org/apache/lucene/analysis/TestLookaheadTokenFilter.java (revision 1520806) +++ lucene/core/src/test/org/apache/lucene/analysis/TestLookaheadTokenFilter.java (working copy) @@ -65,4 +65,35 @@ }; checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 8192); } + + public void testMissedFirstToken() throws Exception { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + TrivialLookaheadFilter filter = new TrivialLookaheadFilter(source); + return new TokenStreamComponents(source, filter); + } + }; + + assertAnalyzesTo(analyzer, + "Only he who is running knows .", + new String[]{ + "Only", + "Only-huh?", + "he", + "he-huh?", + "who", + "who-huh?", + "is", + "is-huh?", + "running", + "running-huh?", + "knows", + "knows-huh?", + ".", + ".-huh?" + }); + } } Index: lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java (revision 1520806) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java (working copy) @@ -106,7 +106,7 @@ /** This is called when all input tokens leaving a given * position have been returned. Override this and - * call createToken and then set whichever token's + * call insertToken and then set whichever token's * attributes you want, if you want to inject * a token starting from this position. */ protected void afterPosition() throws IOException { @@ -222,6 +222,18 @@ if (DEBUG) { System.out.println(" END"); } + afterPosition(); + if (insertPending) { + // Subclass inserted a token at this same + // position: + if (DEBUG) { + System.out.println(" return inserted token"); + } + assert insertedTokenConsistent(); + insertPending = false; + return true; + } + return false; } } else { @@ -260,7 +272,7 @@ final int posLen = posLenAtt.getPositionLength(); final Position endPosData = positions.get(outputPos + posLen); assert endPosData.endOffset != -1; - assert offsetAtt.endOffset() == endPosData.endOffset; + assert offsetAtt.endOffset() == endPosData.endOffset: "offsetAtt.endOffset=" + offsetAtt.endOffset() + " vs expected=" + endPosData.endOffset; return true; }