Index: lucene/test-framework/src/test/org/apache/lucene/analysis/TestPosition.java =================================================================== --- lucene/test-framework/src/test/org/apache/lucene/analysis/TestPosition.java (revision 0) +++ lucene/test-framework/src/test/org/apache/lucene/analysis/TestPosition.java (working copy) @@ -0,0 +1,34 @@ +package org.apache.lucene.analysis; + + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Trivial position class. + */ +public class TestPosition extends LookaheadTokenFilter.Position { + private String fact; + + public String getFact() { + return fact; + } + + public void setFact(String fact) { + this.fact = fact; + } +} Property changes on: lucene/test-framework/src/test/org/apache/lucene/analysis/TestPosition.java ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/test-framework/src/test/org/apache/lucene/analysis/TrivialLookaheadFilter.java =================================================================== --- lucene/test-framework/src/test/org/apache/lucene/analysis/TrivialLookaheadFilter.java (revision 0) +++ lucene/test-framework/src/test/org/apache/lucene/analysis/TrivialLookaheadFilter.java (working copy) @@ -0,0 +1,89 @@ +package org.apache.lucene.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Simple example of a filter that seems to show some problems with LookaheadTokenFilter. + */ +final public class TrivialLookaheadFilter extends LookaheadTokenFilter { + + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + protected TrivialLookaheadFilter(TokenStream input) { + super(input); + } + + @Override + protected TestPosition newPosition() { + return new TestPosition(); + } + + @Override + public boolean incrementToken() throws IOException { + // At the outset, getMaxPos is -1. So we'll peek. When we reach the end of the sentence and go to the + // first token of the next sentence, maxPos will be the prev sentence's end token, and we'll go again. + if (positions.getMaxPos() < outputPos) { + peekSentence(); + } + + //afterPosition is called too late to simply use the results of the lookahead + //to fix something up. + boolean returningSomething = nextToken(); + if (returningSomething) { + termAtt.setEmpty(); + termAtt.append(positions.get(outputPos).getFact()); + } + return returningSomething; + } + + @Override + protected void afterPosition() throws IOException { + // nothing to do in here. + } + + private void peekSentence() throws IOException { + List facts = new ArrayList(); + boolean haveSentence = false; + do { + if (peekToken()) { + + String term = new String(termAtt.buffer(), 0, termAtt.length()); + facts.add(term + "-huh?"); + if (".".equals(term)) { + haveSentence = true; + } + + } else { + haveSentence = true; + } + + } while (!haveSentence); + + // attach the (now disambiguated) analyzed tokens to the positions. + for (int x = 0; x < facts.size(); x++) { + // sentenceTokens is just relative to sentence, positions is absolute. + positions.get(outputPos + x).setFact(facts.get(x)); + } + } +} Property changes on: lucene/test-framework/src/test/org/apache/lucene/analysis/TrivialLookaheadFilter.java ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/test-framework/src/test/org/apache/lucene/analysis/TestLookaheadTokenFilter.java =================================================================== --- lucene/test-framework/src/test/org/apache/lucene/analysis/TestLookaheadTokenFilter.java (revision 0) +++ lucene/test-framework/src/test/org/apache/lucene/analysis/TestLookaheadTokenFilter.java (working copy) @@ -0,0 +1,75 @@ +package org.apache.lucene.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.Reader; + +public class TestLookaheadTokenFilter extends BaseTokenStreamTestCase { + + // this case passes; it resulted from to manipulate the contents of + // a token itself in the afterPosition method, which I realize isn't valid. + public void testMissedFirstToken() throws Exception { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + TrivialLookaheadFilter filter = new TrivialLookaheadFilter(source); + return new TokenStreamComponents(source, filter); + } + }; + assertAnalyzesTo(analyzer, + "Only he who is running knows .", + new String[]{ + "Only-huh?", + "he-huh?", + "who-huh?", + "is-huh?", + "running-huh?", + "knows-huh?", + ".-huh?" + }); + } + + // This fails, since a token gets consumed in the middle of nextToken. + public void testSkipViaPeekInNextToken() throws Exception { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + TrivialLookaheadFilter filter = new TrivialLookaheadFilter(source); + return new TokenStreamComponents(source, filter); + } + }; + assertAnalyzesTo(analyzer, + "Only he who is running knows . Lookout .", + new String[]{ + "Only-huh?", + "he-huh?", + "who-huh?", + "is-huh?", + "running-huh?", + "knows-huh?", + ".-huh?", + "Lookout-huh?", + ".-huh?" + }); + } +} Property changes on: lucene/test-framework/src/test/org/apache/lucene/analysis/TestLookaheadTokenFilter.java ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property