Index: contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixableAndSuffixableTokenFilter.java =================================================================== --- contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixableAndSuffixableTokenFilter.java (revision 0) +++ contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixableAndSuffixableTokenFilter.java (revision 0) @@ -0,0 +1,55 @@ +package org.apache.lucene.analysis.miscellaneous; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.TestCase; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.lucene.analysis.WhitespaceTokenizer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; + +public class TestPrefixableAndSuffixableTokenFilter extends TestCase { + + public void test() throws IOException { + + PrefixableAndSuffixableTokenFilter ts = new PrefixableAndSuffixableTokenFilter( + new SingleTokenTokenStream(new Token("^", 0, 0)), new WhitespaceTokenizer(new StringReader("hello world")), + new SingleTokenTokenStream(new Token("$", 0, 0)) + ); + + assertNext(ts, "^", 0, 0); + assertNext(ts,"hello", 0, 5); + assertNext(ts,"world", 6, 11); + assertNext(ts,"$", 11, 11); + assertNull(ts.next()); + } + + + private Token assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException { + Token token = ts.next(); + assertNotNull(token); + assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); + assertEquals(startOffset, token.startOffset()); + assertEquals(endOffset, token.endOffset()); + return token; + } + +} Index: contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java =================================================================== --- contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java (revision 0) +++ contrib/analyzers/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.analysis.miscellaneous; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.TestCase; + +import java.io.IOException; + +import org.apache.lucene.analysis.Token; + +public class TestSingleTokenTokenFilter extends TestCase { + + public void test() throws IOException { + + Token token = new Token(); + + SingleTokenTokenStream ts = new SingleTokenTokenStream(token); + + assertEquals(token, ts.next()); + assertNull(ts.next()); + + } + +} Index: contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java =================================================================== --- contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (revision 0) +++ contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (revision 0) @@ -0,0 +1,445 @@ +package org.apache.lucene.analysis.shingle; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.TestCase; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.PrefixableAndSuffixableTokenFilter; +import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream; +import org.apache.lucene.analysis.payloads.PayloadHelper; +import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix; +import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedList; + +public class TestShingleMatrixFilter extends TestCase { + + + /** + * Extracts a matrix from a token stream. + * @throws IOException + */ + public void testTokenStream() throws IOException { + + + Token token = new Token(); // for debug use only + + + TokenStream ts; + TokenListStream tls; + LinkedList tokens; + + + + + tokens = new LinkedList(); + tokens.add(tokenFactory("hello", 1, 0, 4)); + tokens.add(tokenFactory("greetings", 0, 0, 4)); + tokens.add(tokenFactory("world", 1, 5, 10)); + tokens.add(tokenFactory("earth", 0, 5, 10)); + tokens.add(tokenFactory("tellus", 0, 5, 10)); + + tls = new TokenListStream(tokens); + ts = new ShingleMatrixFilter.ShingleMatrixSynonymToRowFilter(tls, ShingleMatrixFilter.defaultSettingsCodec); + tls = new TokenListStream(ts); + + // bi-grams + + ts = new ShingleMatrixFilter(tls, 2, 2); + + assertNext(ts, "hello_world"); + assertNext(ts, "greetings_world"); + assertNext(ts, "hello_earth"); + assertNext(ts, "greetings_earth"); + assertNext(ts, "hello_tellus"); + assertNext(ts, "greetings_tellus"); + assertNull(ts.next()); + + // bi-grams with no spacer character, start offset, end offset + + tls.reset(); + ts = tls; + ts = new ShingleMatrixFilter(ts, 2, 2, null); + + assertNext(ts, "helloworld", 0, 10); + assertNext(ts, "greetingsworld", 0, 10); + assertNext(ts, "helloearth", 0, 10); + assertNext(ts, "greetingsearth", 0, 10); + assertNext(ts, "hellotellus", 0, 10); + assertNext(ts, "greetingstellus", 0, 10); + assertNull(ts.next()); + + // add ^_prefix_and_suffix_$ + + tls.reset(); + ts = new PrefixableAndSuffixableTokenFilter(new SingleTokenTokenStream(tokenFactory("^", 1, 100f, 0, 0)), tls, new SingleTokenTokenStream(tokenFactory("$", 1, 50f, 10, 10))); + ts = new ShingleMatrixFilter.ShingleMatrixSynonymToRowFilter(ts, ShingleMatrixFilter.defaultSettingsCodec); + tls = new TokenListStream(ts); + + // bi-grams, position incrememnt, weight, start offset, end offset + + ts = new ShingleMatrixFilter(tls, 2, 2); +// +// while ((token = ts.next(token)) != null) { +// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// token.clear(); +// } + + assertNext(ts, "^_hello", 1, 71.417786f, 0, 4); + assertNext(ts, "^_greetings", 1, 71.417786f, 0, 4); + assertNext(ts, "hello_world", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, "world_$", 1, 36.062447f, 5, 10); + assertNext(ts, "earth_$", 1, 36.062447f, 5, 10); + assertNext(ts, "tellus_$", 1, 36.062447f, 5, 10); + assertNull(ts.next()); + + // test unlimited size and allow single boundary token as shingle + tls.reset(); + ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', false); +// while ((token = ts.next(token)) != null) { +// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// token.clear(); +// } + assertNext(ts, "^", 1, 100.0f, 0, 0); + assertNext(ts, "^_hello", 1, 71.417786f, 0, 4); + assertNext(ts, "^_hello_world", 1, 58.88973f, 0, 10); + assertNext(ts, "^_hello_world_$", 1, 76.0f, 0, 10); + assertNext(ts, "hello", 1, 1.0f, 0, 4); + assertNext(ts, "hello_world", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_world_$", 1, 30.022215f, 0, 10); + assertNext(ts, "world", 1, 1.0f, 5, 10); + assertNext(ts, "world_$", 1, 36.062447f, 5, 10); + assertNext(ts, "$", 1, 50.0f, 10, 10); + assertNext(ts, "^_greetings", 1, 71.417786f, 0, 4); + assertNext(ts, "^_greetings_world", 1, 58.88973f, 0, 10); + assertNext(ts, "^_greetings_world_$", 1, 76.0f, 0, 10); + assertNext(ts, "greetings", 1, 1.0f, 0, 4); + assertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_world_$", 1, 30.022215f, 0, 10); + assertNext(ts, "^_hello_earth", 1, 58.88973f, 0, 10); + assertNext(ts, "^_hello_earth_$", 1, 76.0f, 0, 10); + assertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_earth_$", 1, 30.022215f, 0, 10); + assertNext(ts, "earth", 1, 1.0f, 5, 10); + assertNext(ts, "earth_$", 1, 36.062447f, 5, 10); + assertNext(ts, "^_greetings_earth", 1, 58.88973f, 0, 10); + assertNext(ts, "^_greetings_earth_$", 1, 76.0f, 0, 10); + assertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_earth_$", 1, 30.022215f, 0, 10); + assertNext(ts, "^_hello_tellus", 1, 58.88973f, 0, 10); + assertNext(ts, "^_hello_tellus_$", 1, 76.0f, 0, 10); + assertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_tellus_$", 1, 30.022215f, 0, 10); + assertNext(ts, "tellus", 1, 1.0f, 5, 10); + assertNext(ts, "tellus_$", 1, 36.062447f, 5, 10); + assertNext(ts, "^_greetings_tellus", 1, 58.88973f, 0, 10); + assertNext(ts, "^_greetings_tellus_$", 1, 76.0f, 0, 10); + assertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_tellus_$", 1, 30.022215f, 0, 10); + assertNull(ts.next()); + + // test unlimited size but don't allow single boundary token as shingle + + tls.reset(); + ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', true); +// while ((token = ts.next(token)) != null) { +// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// token.clear(); +// } + + assertNext(ts, "^_hello", 1, 71.417786f, 0, 4); + assertNext(ts, "^_hello_world", 1, 58.88973f, 0, 10); + assertNext(ts, "^_hello_world_$", 1, 76.0f, 0, 10); + assertNext(ts, "hello", 1, 1.0f, 0, 4); + assertNext(ts, "hello_world", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_world_$", 1, 30.022215f, 0, 10); + assertNext(ts, "world", 1, 1.0f, 5, 10); + assertNext(ts, "world_$", 1, 36.062447f, 5, 10); + assertNext(ts, "^_greetings", 1, 71.417786f, 0, 4); + assertNext(ts, "^_greetings_world", 1, 58.88973f, 0, 10); + assertNext(ts, "^_greetings_world_$", 1, 76.0f, 0, 10); + assertNext(ts, "greetings", 1, 1.0f, 0, 4); + assertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_world_$", 1, 30.022215f, 0, 10); + assertNext(ts, "^_hello_earth", 1, 58.88973f, 0, 10); + assertNext(ts, "^_hello_earth_$", 1, 76.0f, 0, 10); + assertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_earth_$", 1, 30.022215f, 0, 10); + assertNext(ts, "earth", 1, 1.0f, 5, 10); + assertNext(ts, "earth_$", 1, 36.062447f, 5, 10); + assertNext(ts, "^_greetings_earth", 1, 58.88973f, 0, 10); + assertNext(ts, "^_greetings_earth_$", 1, 76.0f, 0, 10); + assertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_earth_$", 1, 30.022215f, 0, 10); + assertNext(ts, "^_hello_tellus", 1, 58.88973f, 0, 10); + assertNext(ts, "^_hello_tellus_$", 1, 76.0f, 0, 10); + assertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_tellus_$", 1, 30.022215f, 0, 10); + assertNext(ts, "tellus", 1, 1.0f, 5, 10); + assertNext(ts, "tellus_$", 1, 36.062447f, 5, 10); + assertNext(ts, "^_greetings_tellus", 1, 58.88973f, 0, 10); + assertNext(ts, "^_greetings_tellus_$", 1, 76.0f, 0, 10); + assertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_tellus_$", 1, 30.022215f, 0, 10); + + + assertNull(ts.next()); + + System.currentTimeMillis(); + + + // multi-token synonyms + + tokens = new LinkedList(); + tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn)); + tokens.add(tokenFactory("greetings", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow)); + tokens.add(tokenFactory("and", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow)); + tokens.add(tokenFactory("salutations", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow)); + tokens.add(tokenFactory("world", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newColumn)); + tokens.add(tokenFactory("earth", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newRow)); + tokens.add(tokenFactory("tellus", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newRow)); + + tls = new TokenListStream(tokens); + ts = new ShingleMatrixFilter(tls, 2, 3); + +// while ((token = ts.next(token)) != null) { +// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// token.clear(); +// } + + assertNext(ts, "hello_world", 1, 1.4142135f, 0, 10); + assertNext(ts, "greetings_and", 1, 1.4142135f, 0, 4); + assertNext(ts, "greetings_and_salutations", 1, 1.7320508f, 0, 4); + assertNext(ts, "and_salutations", 1, 1.4142135f, 0, 4); + assertNext(ts, "and_salutations_world", 1, 1.7320508f, 0, 10); + assertNext(ts, "salutations_world", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, "and_salutations_earth", 1, 1.7320508f, 0, 10); + assertNext(ts, "salutations_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, "and_salutations_tellus", 1, 1.7320508f, 0, 10); + assertNext(ts, "salutations_tellus", 1, 1.4142135f, 0, 10); + assertNull(ts.next()); + + System.currentTimeMillis(); + + + } + + /** + * Tests creat shingles from a pre-assembled matrix + * + * Tests the row token z-axis, multi token synonyms. + * + * @throws IOException + */ + public void testMatrix() throws IOException { + + Matrix matrix = new Matrix(); + + matrix.new Column(tokenFactory("no", 1)); + matrix.new Column(tokenFactory("surprise", 1)); + matrix.new Column(tokenFactory("to", 1)); + matrix.new Column(tokenFactory("see", 1)); + matrix.new Column(tokenFactory("england", 1)); + matrix.new Column(tokenFactory("manager", 1)); + + Column col = matrix.new Column(); + + // sven göran eriksson is a multi token synonym to svennis + col.new Row().setTokens(new Token[]{tokenFactory("svennis", 1)}); + col.new Row().setTokens(new Token[]{tokenFactory("sven", 1), tokenFactory("göran", 1), tokenFactory("eriksson", 1)}); + + matrix.new Column(tokenFactory("in", 1)); + matrix.new Column(tokenFactory("the", 1)); + matrix.new Column(tokenFactory("croud", 1)); + + TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, '_', true, ShingleMatrixFilter.defaultSettingsCodec); + + Token token = new Token(); +// while ((token = ts.next(token)) != null) { +// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// token.clear(); +// } + assertNext(ts, "no_surprise", 1, 1.4142135f, 0, 0); + assertNext(ts, "no_surprise_to", 1, 1.7320508f, 0, 0); + assertNext(ts, "no_surprise_to_see", 1, 2.0f, 0, 0); + assertNext(ts, "surprise_to", 1, 1.4142135f, 0, 0); + assertNext(ts, "surprise_to_see", 1, 1.7320508f, 0, 0); + assertNext(ts, "surprise_to_see_england", 1, 2.0f, 0, 0); + assertNext(ts, "to_see", 1, 1.4142135f, 0, 0); + assertNext(ts, "to_see_england", 1, 1.7320508f, 0, 0); + assertNext(ts, "to_see_england_manager", 1, 2.0f, 0, 0); + assertNext(ts, "see_england", 1, 1.4142135f, 0, 0); + assertNext(ts, "see_england_manager", 1, 1.7320508f, 0, 0); + assertNext(ts, "see_england_manager_svennis", 1, 2.0f, 0, 0); + assertNext(ts, "england_manager", 1, 1.4142135f, 0, 0); + assertNext(ts, "england_manager_svennis", 1, 1.7320508f, 0, 0); + assertNext(ts, "england_manager_svennis_in", 1, 2.0f, 0, 0); + assertNext(ts, "manager_svennis", 1, 1.4142135f, 0, 0); + assertNext(ts, "manager_svennis_in", 1, 1.7320508f, 0, 0); + assertNext(ts, "manager_svennis_in_the", 1, 2.0f, 0, 0); + assertNext(ts, "svennis_in", 1, 1.4142135f, 0, 0); + assertNext(ts, "svennis_in_the", 1, 1.7320508f, 0, 0); + assertNext(ts, "svennis_in_the_croud", 1, 2.0f, 0, 0); + assertNext(ts, "in_the", 1, 1.4142135f, 0, 0); + assertNext(ts, "in_the_croud", 1, 1.7320508f, 0, 0); + assertNext(ts, "the_croud", 1, 1.4142135f, 0, 0); + assertNext(ts, "see_england_manager_sven", 1, 2.0f, 0, 0); + assertNext(ts, "england_manager_sven", 1, 1.7320508f, 0, 0); + assertNext(ts, "england_manager_sven_göran", 1, 2.0f, 0, 0); + assertNext(ts, "manager_sven", 1, 1.4142135f, 0, 0); + assertNext(ts, "manager_sven_göran", 1, 1.7320508f, 0, 0); + assertNext(ts, "manager_sven_göran_eriksson", 1, 2.0f, 0, 0); + assertNext(ts, "sven_göran", 1, 1.4142135f, 0, 0); + assertNext(ts, "sven_göran_eriksson", 1, 1.7320508f, 0, 0); + assertNext(ts, "sven_göran_eriksson_in", 1, 2.0f, 0, 0); + assertNext(ts, "göran_eriksson", 1, 1.4142135f, 0, 0); + assertNext(ts, "göran_eriksson_in", 1, 1.7320508f, 0, 0); + assertNext(ts, "göran_eriksson_in_the", 1, 2.0f, 0, 0); + assertNext(ts, "eriksson_in", 1, 1.4142135f, 0, 0); + assertNext(ts, "eriksson_in_the", 1, 1.7320508f, 0, 0); + assertNext(ts, "eriksson_in_the_croud", 1, 2.0f, 0, 0); + + assertNull(ts.next()); + + } + + private Token tokenFactory(String text, int startOffset, int endOffset) { + return tokenFactory(text, 1, 1f, startOffset, endOffset); + } + + + private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) { + return tokenFactory(text, posIncr, 1f, startOffset, endOffset); + } + + + private Token tokenFactory(String text, int posIncr) { + return tokenFactory(text, posIncr, 1f, 0, 0); + } + + private Token tokenFactory(String text, int posIncr, float weight) { + return tokenFactory(text, posIncr, weight, 0, 0); + } + + private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) { + Token token = new Token(); + token.setTermText(text); + token.setPositionIncrement(posIncr); + ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight); + token.setStartOffset(startOffset); + token.setEndOffset(endOffset); + return token; + } + + private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset, ShingleMatrixFilter.TokenPositioner positioner) { + Token token = new Token(); + token.setTermText(text); + token.setPositionIncrement(posIncr); + ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight); + token.setStartOffset(startOffset); + token.setEndOffset(endOffset); + ShingleMatrixFilter.defaultSettingsCodec.setTokenPositioner(token, positioner); + return token; + } + + // assert-methods start here + + private Token assertNext(TokenStream ts, String text) throws IOException { + Token token = ts.next(); + assertNotNull(token); + assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); + return token; + } + + private Token assertNext(TokenStream ts, String text, int positionIncrement, float boost) throws IOException { + Token token = ts.next(); + assertNotNull(token); + assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); + assertEquals(positionIncrement, token.getPositionIncrement()); + assertEquals(boost, token.getPayload() == null ? 1f : PayloadHelper.decodeFloat(token.getPayload().getData())); + return token; + } + + private Token assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException { + Token token = ts.next(); + assertNotNull(token); + assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); + assertEquals(positionIncrement, token.getPositionIncrement()); + assertEquals(boost, token.getPayload() == null ? 1f : PayloadHelper.decodeFloat(token.getPayload().getData())); + assertEquals(startOffset, token.startOffset()); + assertEquals(endOffset, token.endOffset()); + return token; + } + + private Token assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException { + Token token = ts.next(); + assertNotNull(token); + assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); + assertEquals(startOffset, token.startOffset()); + assertEquals(endOffset, token.endOffset()); + return token; + } + + + public static class TokenListStream extends TokenStream { + + private Collection tokens; + + public TokenListStream(TokenStream ts) throws IOException { + tokens = new ArrayList(); + Token token; + while ((token = ts.next(new Token())) != null) { + tokens.add(token); + } + } + + public TokenListStream(Collection tokens) { + this.tokens = tokens; + } + + private Iterator iterator; + + public Token next() throws IOException { + if (iterator == null) { + iterator = tokens.iterator(); + } + if (!iterator.hasNext()) { + return null; + } + return iterator.next(); + } + + + public void reset() throws IOException { + iterator = null; + } + } + +} Index: contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java =================================================================== --- contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (revision 0) +++ contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (revision 0) @@ -0,0 +1,664 @@ +package org.apache.lucene.analysis.shingle; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.payloads.PayloadHelper; +import org.apache.lucene.index.Payload; + +import java.io.IOException; +import java.util.*; + + +public class ShingleMatrixFilter extends TokenStream { + + public static Character defaultspacerCharacter = '_'; + public static TokenSettingsCodec defaultSettingsCodec = new DefaultTokenSettingsCodec(); + public static boolean ignoringSinglePrefixOrSuffixShingleByDefault = false; + + + private TokenSettingsCodec settingsCodec; + + private int minimumShingleSize; + private int maximumShingleSize; + + private boolean ignoringSinglePrefixOrSuffixShingle = false; + + private Character spacerCharacter = '_'; + + private TokenStream input; + + public static enum TokenPositioner { + newColumn(0), newRow(1), sameRow(2); + + private final int index; + + private TokenPositioner(int index) { + this.index = index; + } + + public int getIndex() { + return index; + } + } + + + public ShingleMatrixFilter(Matrix matrix, int minimumShingleSize, int maximumShingleSize, Character spacerCharacter, boolean ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec) { + this.matrix = matrix; + this.minimumShingleSize = minimumShingleSize; + this.maximumShingleSize = maximumShingleSize; + this.spacerCharacter = spacerCharacter; + this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; + this.settingsCodec = settingsCodec; + + this.input = new TokenStream() { + public Token next(Token result) throws IOException { + return null; + } + }; + } + + public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize) { + this(input, minimumShingleSize, maximumShingleSize, defaultspacerCharacter); + } + + + public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Character spacerCharacter) { + this(input, minimumShingleSize, maximumShingleSize, spacerCharacter, ignoringSinglePrefixOrSuffixShingleByDefault); + } + + + public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Character spacerCharacter, boolean ignoringSinglePrefixOrSuffixShingle) { + this(input, minimumShingleSize, maximumShingleSize, spacerCharacter, ignoringSinglePrefixOrSuffixShingle, defaultSettingsCodec); + } + + public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Character spacerCharacter, boolean ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec) { + this.input = input; + this.minimumShingleSize = minimumShingleSize; + this.maximumShingleSize = maximumShingleSize; + this.spacerCharacter = spacerCharacter; + this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; + this.settingsCodec = settingsCodec; + } + + + public static abstract class TokenSettingsCodec { + public abstract TokenPositioner getTokenPositioner(Token token) throws IOException; + + public abstract void setTokenPositioner(Token token, ShingleMatrixFilter.TokenPositioner tokenPositioner); + + public abstract float getWeight(Token token); + + public abstract void setWeight(Token token, float weight); + } + + + private Iterator permutations; + //private Matrix.Column.Row[] currentShingle; + + /** the current permutation of tokens used to produce shingles */ + private List currentPermuationTokens; + /** index to what row a token in currentShingleTokens represents*/ + private List currentPermutationRows; + + private int currentPermutationStartOffset; + private int currentShingleLength; + + private Set> shinglesSeen = new HashSet>(); + + + public void setInput(TokenStream input) { + this.input = input; + } + + public void reset() throws IOException { + permutations = null; + shinglesSeen.clear(); + } + + private Matrix matrix; + + + public Token next(Token token) throws IOException { + if (matrix == null) { + matrix = new Matrix(); + // fill matrix with maximumShingleSize columns + while (matrix.columns.size() < maximumShingleSize && readColumn(input)) { + // this loop looks ugly + } + } + + if (currentPermuationTokens != null) { + currentShingleLength++; + if (currentShingleLength + currentPermutationStartOffset <= currentPermuationTokens.size() + && currentShingleLength <= maximumShingleSize) { + + if (ignoringSinglePrefixOrSuffixShingle + && currentShingleLength == 1 + && (currentPermutationRows.get(currentPermutationStartOffset).getColumn().isFirst() || currentPermutationRows.get(currentPermutationStartOffset).getColumn().isLast())) { + return next(token); + } + + int termLength = 0; + + List shingle = new ArrayList(); + + for (int i = 0; i < currentShingleLength; i++) { + Token shingleToken = currentPermuationTokens.get(i + currentPermutationStartOffset); + termLength += shingleToken.termLength(); + shingle.add(shingleToken); + } + if (spacerCharacter != null) { + termLength += currentShingleLength - 1; + } + + // only produce shingles that not already has been created + if (!shinglesSeen.add(shingle)) { + return next(token); + } + + // shingle token factory + StringBuilder sb = new StringBuilder(termLength + 10); // paranormal abillity to forsay the future. + + for (Token shingleToken : shingle) { + + if (spacerCharacter != null && sb.length() > 0) { + sb.append(spacerCharacter); + } + sb.append(shingleToken.termBuffer(), 0, shingleToken.termLength()); + + } + token.setTermText(sb.toString()); + updateToken(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens); + + return token; + + } else { + + if (currentPermutationStartOffset < currentPermuationTokens.size() - 1 ) { + currentPermutationStartOffset++; + currentShingleLength = minimumShingleSize - 1; + return next(token); + } + + + if (permutations == null) { + return null; + } + + + + if (!permutations.hasNext()) { + // add one more row (if available) and delete the first one + currentShingleLength++; + if (readColumn(input)) { + // well just read it + } + + Matrix.Column deletedColumn = matrix.columns.remove(0); + + // remove all seen shingles that contains permutations with tokens from the deleted column. + List deletedColumnTokens = new ArrayList(); + for (Matrix.Column.Row row : deletedColumn.getRows()) { + for (Token shingleToken : row.getTokens()) { + deletedColumnTokens.add(shingleToken); + } + } + for (Iterator> shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) { + List shingle = shinglesSeenIterator.next(); + for (Token deletedColumnToken : deletedColumnTokens) { + if (shingle.contains(deletedColumnToken)) { + shinglesSeenIterator.remove(); + break; + } + } + } + + + if (matrix.columns.size() < minimumShingleSize) { + return null; + } + permutations = matrix.permutationIterator(); + + } + + + nextTokensPermutation(); + + currentPermutationStartOffset = 0; + +// if (currentPermutationStartOffset < currentPermuationTokens.size()) { + currentShingleLength = minimumShingleSize - 1; + return next(token); +// } + } + } + + if (permutations == null) { + permutations = matrix.permutationIterator(); + } + + if (!permutations.hasNext()) { + return null; + } + + nextTokensPermutation(); + currentPermutationStartOffset = 0; + currentShingleLength = minimumShingleSize - 1; + + return next(token); + } + + private void nextTokensPermutation() { + Matrix.Column.Row[] rowsPermutation = permutations.next(); + List currentPermutationRows = new ArrayList(); + List currentPermuationTokens = new ArrayList(); + for (Matrix.Column.Row row : rowsPermutation) { + for (Token shingleToken : row.getTokens()) { + currentPermuationTokens.add(shingleToken); + currentPermutationRows.add(row); + } + } + this.currentPermuationTokens = currentPermuationTokens; + this.currentPermutationRows = currentPermutationRows; + } + + public void updateToken(Token token, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) { + token.setType(ShingleMatrixFilter.class.getSimpleName()); + token.setFlags(0); + token.setPositionIncrement(1); + token.setStartOffset(shingle.get(0).startOffset()); + token.setEndOffset(shingle.get(shingle.size() - 1).endOffset()); + settingsCodec.setWeight(token, calculateShingleWeight(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens)); + } + + public float calculateShingleWeight(Token token, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) { + float weight = 0f; + float norm = (float) (1d / Math.sqrt(shingle.size())); + for (Token shingleToken : shingle) { + weight += settingsCodec.getWeight(shingleToken) * norm; + } + return weight; + } + + + private Token buf; + + /** + * @param ts the matrix source input stream + * @return true if it manage to read one more column from the token stream + * @throws IOException if the matrix source input stream throws an exception + */ + private boolean readColumn(TokenStream ts) throws IOException { + + Token token; + if (buf != null) { + token = buf; + buf = null; + } else { + token = ts.next(new Token()); + } + + if (token == null) { + return false; + } + + Matrix.Column currentReaderColumn = matrix.new Column(); + Matrix.Column.Row currentReaderRow = currentReaderColumn.new Row(); + + List tokens = new ArrayList(); + tokens.add(token); + TokenPositioner tokenPositioner; + while ((buf = ts.next(new Token())) != null + && (tokenPositioner = settingsCodec.getTokenPositioner(buf)) != TokenPositioner.newColumn) { + + if (tokenPositioner == TokenPositioner.sameRow) { + tokens.add(buf); + } else /*if (tokenPositioner == TokenPositioner.newRow)*/ { + currentReaderRow.setTokens(tokens.toArray(new Token[tokens.size()])); + tokens.clear(); + tokens.add(buf); + currentReaderRow = currentReaderColumn.new Row(); + } + buf = null; + + } + + currentReaderRow.setTokens(tokens.toArray(new Token[tokens.size()])); + + if (buf == null) { + buf = input.next(new Token()); + if (buf == null) { + currentReaderColumn.setLast(true); + } + } + + + return true; + + } + + + /** + * A column focused matrix in three dimensions: + * + * Token[column][row][z-axis] { + * {{hello}, {greetings, and, salutations}}, + * {{world}, {earth}, {tellus}} + * }; + * + * todo consider row groups + * to indicate that shingles is only to contain permutations with texts in that same row group. + * + */ + public static class Matrix { + + private boolean columnsHasBeenCreated = false; + + private List columns = new ArrayList(); + + public List getColumns() { + return columns; + } + + public class Column { + + private boolean last; + private boolean first; + + public Matrix getMatrix() { + return Matrix.this; + } + + public Column(Token token) { + this(); + Row row = new Row(); + row.setTokens(new Token[]{token}); + } + + + public Column() { + synchronized (Matrix.this) { + if (!columnsHasBeenCreated) { + this.setFirst(true); + columnsHasBeenCreated = true; + } + } + Matrix.this.columns.add(this); + } + + private List rows = new ArrayList(); + + public List getRows() { + return rows; + } + + + public int getIndex() { + return Matrix.this.columns.indexOf(this); + } + + public String toString() { + return "Column{" + + "first=" + first + + ", last=" + last + + ", rows=" + rows + + '}'; + } + + public boolean isFirst() { + return first; + } + + public void setFirst(boolean first) { + this.first = first; + } + + public void setLast(boolean last) { + this.last = last; + } + + public boolean isLast() { + return last; + } + + public class Row { + + public Column getColumn() { + return Column.this; + } + + private Token[] tokens; + + public Row() { + Column.this.rows.add(this); + } + + public int getIndex() { + return Column.this.rows.indexOf(this); + } + + + public Token[] getTokens() { + return tokens; + } + + + public void setTokens(Token[] tokens) { + this.tokens = tokens; + } + +// public int getStartOffset() { +// int ret = tokens[0].startOffset(); +// if (getIndex() > 0 && ret == 0) { +// ret = Column.this.rows.get(0).getStartOffset(); +// } +// return ret; +// } +// +// public int getEndOffset() { +// int ret = tokens[tokens.length - 1].endOffset(); +// if (getIndex() > 0 && ret == 0) { +// ret = Column.this.rows.get(0).getEndOffset(); +// } +// return ret; +// } + + public String toString() { + return "Row{" + + "index=" + getIndex() + + ", tokens=" + (tokens == null ? null : Arrays.asList(tokens)) + + '}'; + } + } + + } + + + public Iterator permutationIterator() { + + return new Iterator() { + + private int[] columnRowCounters = new int[columns.size()]; + + public void remove() { + throw new IllegalStateException("not implemented"); + } + + public boolean hasNext() { + int s = columnRowCounters.length; + return columnRowCounters[s - 1] < columns.get(s - 1).getRows().size(); + } + + public Column.Row[] next() { + if (!hasNext()) { + throw new NoSuchElementException("no more elements"); + } + + Column.Row[] rows = new Column.Row[columnRowCounters.length]; + + for (int i = 0; i < columnRowCounters.length; i++) { + rows[i] = columns.get(i).rows.get(columnRowCounters[i]); + } + incrementColumnRowCounters(); + + return rows; + } + + private void incrementColumnRowCounters() { + for (int i = 0; i < columnRowCounters.length; i++) { + columnRowCounters[i]++; + if (columnRowCounters[i] == columns.get(i).rows.size() && + i < columnRowCounters.length - 1) { + columnRowCounters[i] = 0; + } else { + break; + } + } + } + }; + } + + public String toString() { + return "Matrix{" + + "columns=" + columns + + '}'; + } + } + + + public int getMinimumShingleSize() { + return minimumShingleSize; + } + + public void setMinimumShingleSize(int minimumShingleSize) { + this.minimumShingleSize = minimumShingleSize; + } + + public int getMaximumShingleSize() { + return maximumShingleSize; + } + + public void setMaximumShingleSize(int maximumShingleSize) { + this.maximumShingleSize = maximumShingleSize; + } + + + public Matrix getMatrix() { + return matrix; + } + + public void setMatrix(Matrix matrix) { + this.matrix = matrix; + } + + public Character getSpacerCharacter() { + return spacerCharacter; + } + + public void setSpacerCharacter(Character spacerCharacter) { + this.spacerCharacter = spacerCharacter; + } + + public boolean isIgnoringSinglePrefixOrSuffixShingle() { + return ignoringSinglePrefixOrSuffixShingle; + } + + public void setIgnoringSinglePrefixOrSuffixShingle(boolean ignoringSinglePrefixOrSuffixShingle) { + this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; + } + + + /** + * A very simple codec only to be used for something serious + */ + public static class DefaultTokenSettingsCodec extends TokenSettingsCodec { + public TokenPositioner getTokenPositioner(Token token) throws IOException { + switch (token.getFlags()) { + case 0: + return TokenPositioner.newColumn; + case 1: + return TokenPositioner.newRow; + case 2: + return TokenPositioner.sameRow; + } + throw new IOException("Unknown matrix positioning of token " + token); + } + + public void setTokenPositioner(Token token, TokenPositioner tokenPositioner) { + token.setFlags(tokenPositioner.getIndex()); + } + + public float getWeight(Token token) { + if (token.getPayload() == null || token.getPayload().getData() == null) { + return 1f; + } else { + return PayloadHelper.decodeFloat(token.getPayload().getData()); + } + } + + public void setWeight(Token token, float weight) { + if (weight == 1f) { + token.setPayload(null); + } else { + token.setPayload(new Payload(PayloadHelper.encodeFloat(weight))); + } + } + + } + + + /** + * Adds synonyms as rows in the current column in the matrix. + */ + public static class ShingleMatrixSynonymToRowFilter extends TokenFilter { + + private TokenSettingsCodec codec; + + public ShingleMatrixSynonymToRowFilter(TokenStream input, TokenSettingsCodec shingleMatrixFilter) { + super(input); + this.codec = shingleMatrixFilter; + } + + public Token next(Token result) throws IOException { + result = input.next(result); + if (result == null) { + return null; + } + if (result.getPositionIncrement() == 0) { + codec.setTokenPositioner(result, TokenPositioner.newRow); + } else { + codec.setTokenPositioner(result, TokenPositioner.newColumn); + } + return result; + } + + + public void setCodec(TokenSettingsCodec codec) { + this.codec = codec; + } + + public TokenSettingsCodec getCodec() { + return codec; + } + } + + +} Index: contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java =================================================================== --- contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java (revision 0) +++ contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java (revision 0) @@ -0,0 +1,59 @@ +package org.apache.lucene.analysis.miscellaneous; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; + +import java.io.IOException; + +/** + * A token stream containing a single token. + */ +public class SingleTokenTokenStream extends TokenStream { + + private boolean exhausted = false; + private Token token; + + + public SingleTokenTokenStream(Token token) { + this.token = token; + } + + + public Token next(Token result) throws IOException { + if (exhausted) { + return null; + } + exhausted = true; + return token; + } + + + public void reset() throws IOException { + exhausted = false; + } + + public Token getToken() { + return token; + } + + public void setToken(Token token) { + this.token = token; + } +} Index: contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixableAndSuffixableTokenFilter.java =================================================================== --- contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixableAndSuffixableTokenFilter.java (revision 0) +++ contrib/analyzers/src/java/org/apache/lucene/analysis/miscellaneous/PrefixableAndSuffixableTokenFilter.java (revision 0) @@ -0,0 +1,135 @@ +package org.apache.lucene.analysis.miscellaneous; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; + +import java.io.IOException; + + +public class PrefixableAndSuffixableTokenFilter extends TokenFilter { + + private TokenStream prefix; + private TokenStream suffix; + + + public PrefixableAndSuffixableTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) { + super(input); + this.prefix = prefix; + this.suffix = suffix; + + prefixExhausted = prefix == null; + suffixExhausted = suffix == null; + } + + private boolean prefixExhausted; + private boolean suffixExhausted; + + private Token firstInputToken; + private Token lastInputToken; + + private Token buf; + + public Token next(Token result) throws IOException { + if (!prefixExhausted) { + + if (firstInputToken == null) { + firstInputToken = buf = input.next(); + } + + result = prefix.next(result); + if (result == null) { + prefixExhausted = true; + } else { + return updatePrefixToken(result, firstInputToken); + } + } + + if (buf != null) { + result = buf; + buf = null; + } else { + result = input.next(result); + } + + if (result != null) { + lastInputToken = (Token) result.clone(); // todo optimize! ugh!! no good to clone all tokens in the input! + return result; + } + + if (!suffixExhausted) { + result = suffix.next(result); + if (result == null) { + suffixExhausted = true; + return null; + } else { + return updateSuffixToken(result, lastInputToken); + } + } else { + return null; + } + } + + + public Token updatePrefixToken(Token prefixToken, Token firstInputToken) { + prefixToken = (Token) prefixToken.clone(); + prefixToken.setStartOffset(firstInputToken.startOffset()); + prefixToken.setEndOffset(firstInputToken.startOffset()); + return prefixToken; + } + + public Token updateSuffixToken(Token suffixToken, Token lastInputToken) { + suffixToken = (Token) suffixToken.clone(); + suffixToken.setStartOffset(lastInputToken.endOffset()); + suffixToken.setEndOffset(lastInputToken.endOffset()); + return suffixToken; + } + + + public void reset() throws IOException { + super.reset(); + if (prefix != null) { + prefixExhausted = false; + prefix.reset(); + } + if (suffix != null) { + suffixExhausted = false; + suffix.reset(); + } + + } + + + public TokenStream getPrefix() { + return prefix; + } + + public void setPrefix(TokenStream prefix) { + this.prefix = prefix; + } + + public TokenStream getSuffix() { + return suffix; + } + + public void setSuffix(TokenStream suffix) { + this.suffix = suffix; + } +}