Index: solr/core/src/java/org/apache/solr/analysis/CombiningFilterFactory.java =================================================================== --- src/main/java/org/apache/solr/analysis/CombiningFilterFactory.java (revision 0) +++ src/main/java/org/apache/solr/analysis/CombiningFilterFactory.java (revision 0) @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analysis; + +//Apache imports +import org.apache.lucene.analysis.TokenStream; +import org.apache.solr.analysis.BaseTokenFilterFactory; +import org.apache.lucene.analysis.combining.CombiningFilter; + +/** + * Constructs new {@link CombiningFilter}s. + * + * @author mattmann + * + */ +public class CombiningFilterFactory extends BaseTokenFilterFactory { + + @Override + public TokenStream create(TokenStream in) { + return new CombiningFilter(in); + } + +} Index: modules/analysis/common/src/java/org/apache/lucene/analysis/combining/CombiningFilter.java =================================================================== --- src/main/java/org/apache/solr/analysis/CombiningFilter.java (revision 0) +++ src/main/java/org/apache/solr/analysis/CombiningFilter.java (revision 0) @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.combining; + +//JDK imports +import java.io.IOException; + +//Apache imports +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + +/** + * Builds a {@link TokenFilter} responsible for reconstituting a stream of + * tokens into a single token {@link TokenStream}. + * + * We need this to do stop word analysis on a set of keywords, but to sort + * correctly. + * + * @author mattmann + * + */ +public class CombiningFilter extends TokenFilter { + + private boolean firstCall; + + public CombiningFilter(TokenStream in) { + super(in); + addAttribute(TermAttribute.class); + firstCall = true; + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.analysis.TokenStream#incrementToken() + */ + @Override + public boolean incrementToken() throws IOException { + if (firstCall) { + final StringBuilder builder = new StringBuilder(); + while (input.incrementToken()) { + TermAttribute ta = (TermAttribute) input + .getAttribute(TermAttribute.class); + builder.append(ta.term()); + } + TermAttribute ta = (TermAttribute) this + .getAttribute(TermAttribute.class); + + ta.setTermBuffer(builder.toString()); + this.firstCall = false; + return true; + } else + return false; + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.analysis.TokenFilter#reset() + */ + @Override + public void reset() throws IOException { + this.input.reset(); + this.firstCall = true; + } + +} Index: modules/analysis/common/src/test/org/apache/lucene/analysis/combining/TestCombiningFilter.java =================================================================== --- src/test/java/org/apache/solr/analysis/TestCombiningFilter.java (revision 0) +++ src/test/java/org/apache/solr/analysis/TestCombiningFilter.java (revision 0) @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.combining; + +//JDK imports +import java.io.StringReader; + +//Apache imports +import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + + +//Junit imports +import junit.framework.TestCase; + +/** + * Test harness for the {@link CombiningFilter}. + * + * @author mattmann + * + */ +public class TestCombiningFilter extends TestCase { + + private static final String TEST_TXT = "The Bone Collector"; + + private static final String EXPECTED = "thebonecollector"; + + public void testFilterReset() throws Exception{ + StringReader reader = new StringReader(TEST_TXT); + TokenStream ts = new WhitespaceTokenizer(reader); + ts = new LowerCaseFilter(ts); + ts = new CombiningFilter(ts); + assertNotNull(ts); + + assertTrue(ts.incrementToken()); + TermAttribute ta = (TermAttribute) ts.getAttribute(TermAttribute.class); + assertNotNull(ta); + assertNotNull(ta.term()); + assertEquals(ta.term(), EXPECTED); + assertFalse(ts.incrementToken()); + ts.reset(); + assertNotNull(ts); + assertTrue(ts.incrementToken()); + + if (ts != null) { + ts.close(); + ts.end(); + ts = null; + } + + } + + public void testFilter() throws Exception { + StringReader reader = new StringReader(TEST_TXT); + TokenStream ts = new WhitespaceTokenizer(reader); + ts = new LowerCaseFilter(ts); + + assertNotNull(ts); + assertTrue(ts.incrementToken()); + TermAttribute ta = (TermAttribute) ts.getAttribute(TermAttribute.class); + assertNotNull(ta); + assertNotNull(ta.term()); + assertEquals(ta.term(), "the"); + assertTrue(ts.incrementToken()); + ta = (TermAttribute) ts.getAttribute(TermAttribute.class); + assertNotNull(ta); + assertNotNull(ta.term()); + assertEquals(ta.term(), "bone"); + assertTrue(ts.incrementToken()); + ta = (TermAttribute) ts.getAttribute(TermAttribute.class); + assertNotNull(ta); + assertNotNull(ta.term()); + assertEquals(ta.term(), "collector"); + assertFalse(ts.incrementToken()); + + reader = new StringReader(TEST_TXT); + ts = new WhitespaceTokenizer(reader); + ts = new LowerCaseFilter(ts); + ts = new CombiningFilter(ts); + assertNotNull(ts); + + assertTrue(ts.incrementToken()); + ta = (TermAttribute) ts.getAttribute(TermAttribute.class); + assertNotNull(ta); + assertNotNull(ta.term()); + assertEquals(ta.term(), EXPECTED); + assertFalse(ts.incrementToken()); + + if (ts != null) { + ts.close(); + ts.end(); + ts = null; + } + } + +}