Index: contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java =================================================================== --- contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (revision 0) +++ contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (revision 0) @@ -0,0 +1,119 @@ +package org.apache.lucene.analysis.ngram; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceTokenizer; + +import java.io.StringReader; + +import junit.framework.TestCase; + +/** + * Tests {@link EdgeNGramTokenFilter} for correctness. + * @author Otis Gospodnetic + */ +public class EdgeNGramTokenFilterTest extends TestCase { + private TokenStream input; + + public void setUp() { + input = new WhitespaceTokenizer(new StringReader("abcde")); + } + + public void testInvalidInput() throws Exception { + boolean gotException = false; + try { + new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 0, 0); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testInvalidInput2() throws Exception { + boolean gotException = false; + try { + new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 2, 1); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testInvalidInput3() throws Exception { + boolean gotException = false; + try { + new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, -1, 2); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testFrontUnigram() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 1); + Token token = null; + token = tokenizer.next(); + assertEquals("(a,0,1)", token.toString()); + token = tokenizer.next(); + assertNull(token); + } + + public void testBackUnigram() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 1); + Token token = null; + token = tokenizer.next(); + assertEquals("(e,4,5)", token.toString()); + token = tokenizer.next(); + assertNull(token); + } + + public void testOversizedNgrams() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 6, 6); + Token token = null; + token = tokenizer.next(); + assertNull(token); + } + + public void testFrontRangeOfNgrams() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 3); + Token token = null; + token = tokenizer.next(); + assertEquals("(a,0,1)", token.toString()); + token = tokenizer.next(); + assertEquals("(ab,0,2)", token.toString()); + token = tokenizer.next(); + assertEquals("(abc,0,3)", token.toString()); + token = tokenizer.next(); + assertNull(token); + } + + public void testBackRangeOfNgrams() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 3); + Token token = null; + token = tokenizer.next(); + assertEquals("(e,4,5)", token.toString()); + token = tokenizer.next(); + assertEquals("(de,3,5)", token.toString()); + token = tokenizer.next(); + assertEquals("(cde,2,5)", token.toString()); + token = tokenizer.next(); + assertNull(token); + } +} Index: contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java =================================================================== --- contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (revision 0) +++ contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (revision 0) @@ -0,0 +1,139 @@ +package org.apache.lucene.analysis.ngram; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceTokenizer; + +import java.io.StringReader; +import java.util.ArrayList; + +import junit.framework.TestCase; + +/** + * Tests {@link NGramTokenFilter} for correctness. + * @author Otis Gospodnetic + */ +public class NGramTokenFilterTest extends TestCase { + private TokenStream input; + private ArrayList tokens = new ArrayList(); + + public void setUp() { + input = new WhitespaceTokenizer(new StringReader("abcde")); + } + + public void testInvalidInput() throws Exception { + boolean gotException = false; + try { + new NGramTokenFilter(input, 2, 1); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testInvalidInput2() throws Exception { + boolean gotException = false; + try { + new NGramTokenFilter(input, 0, 1); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testUnigrams() throws Exception { + NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1); + + Token token = null; + do { + token = filter.next(); + if (token != null) { + tokens.add(token.toString()); +// System.out.println(token.termText()); +// System.out.println(token); +// Thread.sleep(1000); + } + } while (token != null); + + assertEquals(5, tokens.size()); + ArrayList exp = new ArrayList(); + exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)"); + assertEquals(exp, tokens); + } + + public void testBigrams() throws Exception { + NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2); + + Token token = null; + do { + token = filter.next(); + if (token != null) { + tokens.add(token.toString()); +// System.out.println(token.termText()); +// System.out.println(token); +// Thread.sleep(1000); + } + } while (token != null); + + assertEquals(4, tokens.size()); + ArrayList exp = new ArrayList(); + exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)"); + assertEquals(exp, tokens); + } + + public void testNgrams() throws Exception { + NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3); + + Token token = null; + do { + token = filter.next(); + if (token != null) { + tokens.add(token.toString()); +// System.out.println(token.termText()); +// System.out.println(token); +// Thread.sleep(1000); + } + } while (token != null); + + assertEquals(12, tokens.size()); + ArrayList exp = new ArrayList(); + exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)"); + exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)"); + exp.add("(abc,0,3)"); exp.add("(bcd,1,4)"); exp.add("(cde,2,5)"); + assertEquals(exp, tokens); + } + + public void testOversizedNgrams() throws Exception { + NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7); + + Token token = null; + do { + token = filter.next(); + if (token != null) { + tokens.add(token.toString()); +// System.out.println(token.termText()); +// System.out.println(token); +// Thread.sleep(1000); + } + } while (token != null); + + assertTrue(tokens.isEmpty()); + } +} Index: contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java =================================================================== --- contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (revision 0) +++ contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (revision 0) @@ -0,0 +1,119 @@ +package org.apache.lucene.analysis.ngram; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceTokenizer; + +import java.io.StringReader; + +import junit.framework.TestCase; + +/** + * Tests {@link EdgeNGramTokenFilter} for correctness. + * @author Otis Gospodnetic + */ +public class EdgeNGramTokenFilterTest extends TestCase { + private TokenStream input; + + public void setUp() { + input = new WhitespaceTokenizer(new StringReader("abcde")); + } + + public void testInvalidInput() throws Exception { + boolean gotException = false; + try { + new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 0, 0); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testInvalidInput2() throws Exception { + boolean gotException = false; + try { + new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 2, 1); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testInvalidInput3() throws Exception { + boolean gotException = false; + try { + new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, -1, 2); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testFrontUnigram() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 1); + Token token = null; + token = tokenizer.next(); + assertEquals("(a,0,1)", token.toString()); + token = tokenizer.next(); + assertNull(token); + } + + public void testBackUnigram() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 1); + Token token = null; + token = tokenizer.next(); + assertEquals("(e,4,5)", token.toString()); + token = tokenizer.next(); + assertNull(token); + } + + public void testOversizedNgrams() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 6, 6); + Token token = null; + token = tokenizer.next(); + assertNull(token); + } + + public void testFrontRangeOfNgrams() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 3); + Token token = null; + token = tokenizer.next(); + assertEquals("(a,0,1)", token.toString()); + token = tokenizer.next(); + assertEquals("(ab,0,2)", token.toString()); + token = tokenizer.next(); + assertEquals("(abc,0,3)", token.toString()); + token = tokenizer.next(); + assertNull(token); + } + + public void testBackRangeOfNgrams() throws Exception { + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 3); + Token token = null; + token = tokenizer.next(); + assertEquals("(e,4,5)", token.toString()); + token = tokenizer.next(); + assertEquals("(de,3,5)", token.toString()); + token = tokenizer.next(); + assertEquals("(cde,2,5)", token.toString()); + token = tokenizer.next(); + assertNull(token); + } +} Index: contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java =================================================================== --- contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (revision 0) +++ contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (revision 0) @@ -0,0 +1,139 @@ +package org.apache.lucene.analysis.ngram; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceTokenizer; + +import java.io.StringReader; +import java.util.ArrayList; + +import junit.framework.TestCase; + +/** + * Tests {@link NGramTokenFilter} for correctness. + * @author Otis Gospodnetic + */ +public class NGramTokenFilterTest extends TestCase { + private TokenStream input; + private ArrayList tokens = new ArrayList(); + + public void setUp() { + input = new WhitespaceTokenizer(new StringReader("abcde")); + } + + public void testInvalidInput() throws Exception { + boolean gotException = false; + try { + new NGramTokenFilter(input, 2, 1); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testInvalidInput2() throws Exception { + boolean gotException = false; + try { + new NGramTokenFilter(input, 0, 1); + } catch (IllegalArgumentException e) { + gotException = true; + } + assertTrue(gotException); + } + + public void testUnigrams() throws Exception { + NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1); + + Token token = null; + do { + token = filter.next(); + if (token != null) { + tokens.add(token.toString()); +// System.out.println(token.termText()); +// System.out.println(token); +// Thread.sleep(1000); + } + } while (token != null); + + assertEquals(5, tokens.size()); + ArrayList exp = new ArrayList(); + exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)"); + assertEquals(exp, tokens); + } + + public void testBigrams() throws Exception { + NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2); + + Token token = null; + do { + token = filter.next(); + if (token != null) { + tokens.add(token.toString()); +// System.out.println(token.termText()); +// System.out.println(token); +// Thread.sleep(1000); + } + } while (token != null); + + assertEquals(4, tokens.size()); + ArrayList exp = new ArrayList(); + exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)"); + assertEquals(exp, tokens); + } + + public void testNgrams() throws Exception { + NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3); + + Token token = null; + do { + token = filter.next(); + if (token != null) { + tokens.add(token.toString()); +// System.out.println(token.termText()); +// System.out.println(token); +// Thread.sleep(1000); + } + } while (token != null); + + assertEquals(12, tokens.size()); + ArrayList exp = new ArrayList(); + exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)"); + exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)"); + exp.add("(abc,0,3)"); exp.add("(bcd,1,4)"); exp.add("(cde,2,5)"); + assertEquals(exp, tokens); + } + + public void testOversizedNgrams() throws Exception { + NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7); + + Token token = null; + do { + token = filter.next(); + if (token != null) { + tokens.add(token.toString()); +// System.out.println(token.termText()); +// System.out.println(token); +// Thread.sleep(1000); + } + } while (token != null); + + assertTrue(tokens.isEmpty()); + } +}