Index: solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java (revision 940782) +++ solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java (working copy) @@ -26,7 +26,8 @@ import java.util.Iterator; import java.util.Arrays; -public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase { +/** Simple tests to ensure this factory is working */ +public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenTestCase { public static Token tok(int pos, String t, int start, int end) { Token tok = new Token(t,start,end); @@ -63,23 +64,8 @@ assertTokenStreamContents(ts, expected.split("\\s")); } - - public void testNoDups() throws Exception { - - testDups("A B B C D E" - ,tok(1,"A", 0, 4) - ,tok(1,"B", 5, 10) - ,tok(1,"B",11, 15) - ,tok(1,"C",16, 20) - ,tok(0,"D",16, 20) - ,tok(1,"E",21, 25) - ); - - } - - + public void testSimpleDups() throws Exception { - testDups("A B C D E" ,tok(1,"A", 0, 4) ,tok(1,"B", 5, 10) @@ -87,34 +73,6 @@ ,tok(1,"C",16, 20) ,tok(0,"D",16, 20) ,tok(1,"E",21, 25) - ); - + ); } - - public void testComplexDups() throws Exception { - - testDups("A B C D E F G H I J K" - ,tok(1,"A") - ,tok(1,"B") - ,tok(0,"B") - ,tok(1,"C") - ,tok(1,"D") - ,tok(0,"D") - ,tok(0,"D") - ,tok(1,"E") - ,tok(1,"F") - ,tok(0,"F") - ,tok(1,"G") - ,tok(0,"H") - ,tok(0,"H") - ,tok(1,"I") - ,tok(1,"J") - ,tok(0,"K") - ,tok(0,"J") - ); - - } - - - } Index: solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java (revision 940782) +++ solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java (working copy) @@ -18,6 +18,7 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter; /** * @version $Id:$ Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 940782) +++ lucene/contrib/CHANGES.txt (working copy) @@ -163,6 +163,8 @@ constructs. - o.a.l.analysis.miscellaneous.WordDelimiterFilter: TokenFilter that splits words into subwords and performs optional transformations on subword groups. + - o.a.l.analysis.miscellaneous.RemoveDuplicatesTokenFilter: TokenFilter which + filters out Tokens at the same position and Term text as the previous token. (... in progress) Build Index: lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java (revision 940782) +++ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java (working copy) @@ -15,8 +15,9 @@ * limitations under the License. */ -package org.apache.solr.analysis; +package org.apache.lucene.analysis.miscellaneous; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -26,7 +27,7 @@ import java.util.Iterator; import java.util.Arrays; -public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase { +public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase { public static Token tok(int pos, String t, int start, int end) { Token tok = new Token(t,start,end); @@ -41,8 +42,7 @@ throws Exception { final Iterator toks = Arrays.asList(tokens).iterator(); - RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory(); - final TokenStream ts = factory.create + final TokenStream ts = new RemoveDuplicatesTokenFilter( (new TokenStream() { CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @@ -59,7 +59,7 @@ return false; } } - }); + })); assertTokenStreamContents(ts, expected.split("\\s")); } Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java (revision 940782) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java (working copy) @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.analysis; +package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenFilter;