Index: solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java (revision 940756) +++ solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.commongrams.CommonGramsFilter; import org.apache.solr.common.ResourceLoader; import org.apache.solr.util.plugin.ResourceLoaderAware; Index: solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java (revision 940756) +++ solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java (working copy) @@ -23,6 +23,8 @@ import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.commongrams.CommonGramsFilter; +import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter; import org.apache.solr.common.ResourceLoader; import org.apache.solr.util.plugin.ResourceLoaderAware; Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 940756) +++ lucene/contrib/CHANGES.txt (working copy) @@ -155,6 +155,12 @@ of AttributeSource.cloneAttributes() instances and the new copyTo() method. (Steven Rowe via Uwe Schindler) + * LUCENE-2413: Consolidated Solr analysis components into contrib/analyzers. + New features from Solr now available to Lucene users include: + - o.a.l.analysis.commongrams: Constructs n-grams for frequently occurring terms + and phrases. + (... in progress) + Build * LUCENE-2124: Moved the JDK-based collation support from contrib/collation Index: lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java =================================================================== --- lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (revision 0) +++ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (working copy) @@ -14,28 +14,29 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.analysis; +package org.apache.lucene.analysis.commongrams; import java.io.Reader; import java.io.StringReader; import java.util.Set; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** - * Tests CommonGramsQueryFilter + * Tests CommonGrams(Query)Filter */ -public class CommonGramsFilterTest extends BaseTokenTestCase { +public class CommonGramsFilterTest extends BaseTokenStreamTestCase { private static final String[] commonWords = { "s", "a", "b", "c", "d", "the", "of" }; public void testReset() throws Exception { final String input = "How the s a brown s cow d like A B thing?"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class); @@ -56,7 +57,7 @@ public void testQueryReset() throws Exception { final String input = "How the s a brown s cow d like A B thing?"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); @@ -88,7 +89,7 @@ @Override public TokenStream tokenStream(String field, Reader in) { return new CommonGramsQueryFilter(new CommonGramsFilter( - new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords)); + new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords)); } }; @@ -157,7 +158,7 @@ @Override public TokenStream tokenStream(String field, Reader in) { return new CommonGramsFilter( - new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords); + new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords); } }; @@ -243,7 +244,7 @@ */ public void testCaseSensitive() throws Exception { final String input = "How The s a brown s cow d like A B thing?"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); Set common = CommonGramsFilter.makeCommonSet(commonWords); TokenFilter cgf = new CommonGramsFilter(wt, common, false); assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s", @@ -256,7 +257,7 @@ */ public void testLastWordisStopWord() throws Exception { final String input = "dog the"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "dog_the" }); @@ -267,7 +268,7 @@ */ public void testFirstWordisStopWord() throws Exception { final String input = "the dog"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the_dog" }); @@ -278,7 +279,7 @@ */ public void testOneWordQueryStopWord() throws Exception { final String input = "the"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the" }); @@ -289,7 +290,7 @@ */ public void testOneWordQuery() throws Exception { final String input = "monster"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "monster" }); @@ -300,7 +301,7 @@ */ public void TestFirstAndLastStopWord() throws Exception { final String input = "the of"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the_of" }); Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (revision 0) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (working copy) @@ -7,7 +7,7 @@ * See the License for the specific language governing permissions and limitations under the License. */ -package org.apache.solr.analysis; +package org.apache.lucene.analysis.commongrams; import java.io.IOException; import java.util.Arrays; Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java (revision 0) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java (working copy) @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.analysis; +package org.apache.lucene.analysis.commongrams; import java.io.IOException; @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import static org.apache.solr.analysis.CommonGramsFilter.GRAM_TYPE; +import static org.apache.lucene.analysis.commongrams.CommonGramsFilter.GRAM_TYPE; /** * Wrap a CommonGramsFilter optimizing phrase queries by only returning single Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html (revision 0) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html (revision 0) @@ -0,0 +1,22 @@ + + + + +Construct n-grams for frequently occurring terms and phrases. + + Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\commongrams\package.html ___________________________________________________________________ Added: svn:eol-style + native