args) {
+ super.init(args);
+ assureMatchVersion();
+ }
+
+ public TokenStream create(TokenStream input) {
+ return new EnglishPossessiveFilter(luceneMatchVersion, input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilterFactory.java (working copy)
@@ -0,0 +1,33 @@
+package org.apache.lucene.analysis.en;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.en.KStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link KStemFilter}
+ */
+public class KStemFilterFactory extends TokenFilterFactory {
+
+ public TokenFilter create(TokenStream input) {
+ return new KStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.en;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link PorterStemFilter}.
+ *
+ * <fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.PorterStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PorterStemFilterFactory extends TokenFilterFactory {
+ public PorterStemFilter create(TokenStream input) {
+ return new PorterStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.es;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.es.SpanishLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link SpanishLightStemFilter}.
+ *
+ * <fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.SpanishLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class SpanishLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new SpanishLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java (working copy)
@@ -0,0 +1,50 @@
+package org.apache.lucene.analysis.fa;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.CharFilter;
+import org.apache.lucene.analysis.fa.PersianCharFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+
+/**
+ * Factory for {@link PersianCharFilter}.
+ *
+ * <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.PersianCharFilterFactory"/>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PersianCharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
+
+ @Override
+ public CharFilter create(Reader input) {
+ return new PersianCharFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java (working copy)
@@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.fa;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link PersianNormalizationFilter}.
+ *
+ * <fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.PersianCharFilterFactory"/>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.PersianNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PersianNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public PersianNormalizationFilter create(TokenStream input) {
+ return new PersianNormalizationFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.fi;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link FinnishLightStemFilter}.
+ *
+ * <fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.FinnishLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class FinnishLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new FinnishLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java (working copy)
@@ -0,0 +1,61 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.util.*;
+
+import java.io.IOException;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Factory for {@link ElisionFilter}.
+ *
+ * <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.ElisionFilterFactory"
+ * articles="stopwordarticles.txt" ignoreCase="true"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ElisionFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ private CharArraySet articles;
+
+ public void inform(ResourceLoader loader) {
+ String articlesFile = args.get("articles");
+ boolean ignoreCase = getBoolean("ignoreCase", false);
+
+ if (articlesFile != null) {
+ try {
+ articles = getWordSet(loader, articlesFile, ignoreCase);
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading articles", e);
+ }
+ }
+ }
+
+ public ElisionFilter create(TokenStream input) {
+ assureMatchVersion();
+ return articles == null ? new ElisionFilter(luceneMatchVersion,input) :
+ new ElisionFilter(luceneMatchVersion,input,articles);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link FrenchLightStemFilter}.
+ *
+ * <fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.ElisionFilterFactory"/>
+ * <filter class="solr.FrenchLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class FrenchLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new FrenchLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link FrenchMinimalStemFilter}.
+ *
+ * <fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.ElisionFilterFactory"/>
+ * <filter class="solr.FrenchMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class FrenchMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new FrenchMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java (working copy)
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.ga;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ga.IrishLowerCaseFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link IrishLowerCaseFilter}.
+ *
+ * <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.IrishLowerCaseFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class IrishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+
+ @Override
+ public TokenStream create(TokenStream input) {
+ return new IrishLowerCaseFilter(input);
+ }
+
+ // this will 'mostly work', except for special cases, just like most other filters
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.gl;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.gl.GalicianMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GalicianMinimalStemFilter}.
+ *
+ * <fieldType name="text_glplural" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.GalicianMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GalicianMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new GalicianMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.gl;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.gl.GalicianStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GalicianStemFilter}.
+ *
+ * <fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.GalicianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GalicianStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new GalicianStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java (working copy)
@@ -0,0 +1,46 @@
+package org.apache.lucene.analysis.hi;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link HindiNormalizationFilter}.
+ *
+ * <fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.HindiNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class HindiNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public TokenStream create(TokenStream input) {
+ return new HindiNormalizationFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.hi;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hi.HindiStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link HindiStemFilter}.
+ *
+ * <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.HindiStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class HindiStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new HindiStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.hu;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link HungarianLightStemFilter}.
+ *
+ * <fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.HungarianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class HungarianLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new HungarianLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (working copy)
@@ -0,0 +1,121 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hunspell.HunspellDictionary;
+import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * TokenFilterFactory that creates instances of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}.
+ * Example config for British English including a custom dictionary, case insensitive matching:
+ *
+ * <filter class="solr.HunspellStemFilterFactory"
+ * dictionary="en_GB.dic,my_custom.dic"
+ * affix="en_GB.aff"
+ * ignoreCase="true" />
+ * Both parameters dictionary and affix are mandatory.
+ *
+ * The parameter ignoreCase (true/false) controls whether matching is case sensitive or not. Default false.
+ *
+ * The parameter strictAffixParsing (true/false) controls whether the affix parsing is strict or not. Default true.
+ * If strict an error while reading an affix rule causes a ParseException, otherwise is ignored.
+ *
+ * Dictionaries for many languages are available through the OpenOffice project.
+ *
+ * See http://wiki.apache.org/solr/Hunspell
+ */
+public class HunspellStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ private static final String PARAM_DICTIONARY = "dictionary";
+ private static final String PARAM_AFFIX = "affix";
+ private static final String PARAM_IGNORE_CASE = "ignoreCase";
+ private static final String PARAM_STRICT_AFFIX_PARSING = "strictAffixParsing";
+ private static final String TRUE = "true";
+ private static final String FALSE = "false";
+
+ private HunspellDictionary dictionary;
+ private boolean ignoreCase = false;
+
+ /**
+ * Loads the hunspell dictionary and affix files defined in the configuration
+ *
+ * @param loader ResourceLoader used to load the files
+ */
+ public void inform(ResourceLoader loader) {
+ assureMatchVersion();
+ String dictionaryArg = args.get(PARAM_DICTIONARY);
+ if (dictionaryArg == null) {
+ throw new InitializationException("Parameter " + PARAM_DICTIONARY + " is mandatory.");
+ }
+ String dictionaryFiles[] = args.get(PARAM_DICTIONARY).split(",");
+ String affixFile = args.get(PARAM_AFFIX);
+ String pic = args.get(PARAM_IGNORE_CASE);
+ if(pic != null) {
+ if(pic.equalsIgnoreCase(TRUE)) ignoreCase = true;
+ else if(pic.equalsIgnoreCase(FALSE)) ignoreCase = false;
+ else throw new InitializationException("Unknown value for " + PARAM_IGNORE_CASE + ": " + pic + ". Must be true or false");
+ }
+
+ String strictAffixParsingParam = args.get(PARAM_STRICT_AFFIX_PARSING);
+ boolean strictAffixParsing = true;
+ if(strictAffixParsingParam != null) {
+ if(strictAffixParsingParam.equalsIgnoreCase(FALSE)) strictAffixParsing = false;
+ else if(strictAffixParsingParam.equalsIgnoreCase(TRUE)) strictAffixParsing = true;
+ else throw new InitializationException("Unknown value for " + PARAM_STRICT_AFFIX_PARSING + ": " + strictAffixParsingParam + ". Must be true or false");
+ }
+
+ InputStream affix = null;
+ List dictionaries = new ArrayList();
+
+ try {
+ dictionaries = new ArrayList();
+ for (String file : dictionaryFiles) {
+ dictionaries.add(loader.openResource(file));
+ }
+ affix = loader.openResource(affixFile);
+
+ this.dictionary = new HunspellDictionary(affix, dictionaries, luceneMatchVersion, ignoreCase, strictAffixParsing);
+ } catch (Exception e) {
+ throw new InitializationException("Unable to load hunspell data! [dictionary=" + args.get("dictionary") + ",affix=" + affixFile + "]", e);
+ } finally {
+ IOUtils.closeWhileHandlingException(affix);
+ IOUtils.closeWhileHandlingException(dictionaries);
+ }
+ }
+
+ /**
+ * Creates an instance of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter} that will filter the given
+ * TokenStream
+ *
+ * @param tokenStream TokenStream that will be filtered
+ * @return HunspellStemFilter that filters the TokenStream
+ */
+ public TokenStream create(TokenStream tokenStream) {
+ return new HunspellStemFilter(tokenStream, dictionary);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilterFactory.java (working copy)
@@ -0,0 +1,50 @@
+package org.apache.lucene.analysis.id;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.id.IndonesianStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link IndonesianStemFilter}.
+ *
+ * <fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class IndonesianStemFilterFactory extends TokenFilterFactory {
+ private boolean stemDerivational = true;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ stemDerivational = getBoolean("stemDerivational", true);
+ }
+
+ public TokenStream create(TokenStream input) {
+ return new IndonesianStemFilter(input, stemDerivational);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java (working copy)
@@ -0,0 +1,46 @@
+package org.apache.lucene.analysis.in;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.in.IndicNormalizationFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link IndicNormalizationFilter}.
+ *
+ * <fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.IndicNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class IndicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public TokenStream create(TokenStream input) {
+ return new IndicNormalizationFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.it;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.it.ItalianLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link ItalianLightStemFilter}.
+ *
+ * <fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.ItalianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ItalianLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new ItalianLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.lv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.lv.LatvianStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link LatvianStemFilter}.
+ *
+ * <fieldType name="text_lvstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.LatvianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class LatvianStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new LatvianStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Factory for {@link ASCIIFoldingFilter}.
+ *
+ * <fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ASCIIFoldingFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public ASCIIFoldingFilter create(TokenStream input) {
+ return new ASCIIFoldingFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java (working copy)
@@ -0,0 +1,140 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.CapitalizationFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.StringTokenizer;
+
+/**
+ * Factory for {@link CapitalizationFilter}.
+ *
+ * The factory takes parameters:
+ * "onlyFirstWord" - should each word be capitalized or all of the words?
+ * "keep" - a keep word list. Each word that should be kept separated by whitespace.
+ * "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.
+ * "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list
+ * "okPrefix" - do not change word capitalization if a word begins with something in this list.
+ * for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
+ * "Mckinley"
+ * "minWordLength" - how long the word needs to be to get capitalization applied. If the
+ * minWordLength is 3, "and" > "And" but "or" stays "or"
+ * "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
+ * assumed to be correct.
+ *
+ *
+ * <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
+ * keep="java solr lucene" keepIgnoreCase="false"
+ * okPrefix="McK McD McA"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ * @since solr 1.3
+ */
+public class CapitalizationFilterFactory extends TokenFilterFactory {
+ public static final String KEEP = "keep";
+ public static final String KEEP_IGNORE_CASE = "keepIgnoreCase";
+ public static final String OK_PREFIX = "okPrefix";
+ public static final String MIN_WORD_LENGTH = "minWordLength";
+ public static final String MAX_WORD_COUNT = "maxWordCount";
+ public static final String MAX_TOKEN_LENGTH = "maxTokenLength";
+ public static final String ONLY_FIRST_WORD = "onlyFirstWord";
+ public static final String FORCE_FIRST_LETTER = "forceFirstLetter";
+
+ //Map keep = new HashMap(); // not synchronized because it is only initialized once
+ CharArraySet keep;
+
+ Collection okPrefix = Collections.emptyList(); // for Example: McK
+
+ int minWordLength = 0; // don't modify capitalization for words shorter then this
+ int maxWordCount = CapitalizationFilter.DEFAULT_MAX_WORD_COUNT;
+ int maxTokenLength = CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH;
+ boolean onlyFirstWord = true;
+ boolean forceFirstLetter = true; // make sure the first letter is capitol even if it is in the keep list
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ assureMatchVersion();
+
+ String k = args.get(KEEP);
+ if (k != null) {
+ StringTokenizer st = new StringTokenizer(k);
+ boolean ignoreCase = false;
+ String ignoreStr = args.get(KEEP_IGNORE_CASE);
+ if ("true".equalsIgnoreCase(ignoreStr)) {
+ ignoreCase = true;
+ }
+ keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
+ while (st.hasMoreTokens()) {
+ k = st.nextToken().trim();
+ keep.add(k.toCharArray());
+ }
+ }
+
+ k = args.get(OK_PREFIX);
+ if (k != null) {
+ okPrefix = new ArrayList();
+ StringTokenizer st = new StringTokenizer(k);
+ while (st.hasMoreTokens()) {
+ okPrefix.add(st.nextToken().trim().toCharArray());
+ }
+ }
+
+ k = args.get(MIN_WORD_LENGTH);
+ if (k != null) {
+ minWordLength = Integer.valueOf(k);
+ }
+
+ k = args.get(MAX_WORD_COUNT);
+ if (k != null) {
+ maxWordCount = Integer.valueOf(k);
+ }
+
+ k = args.get(MAX_TOKEN_LENGTH);
+ if (k != null) {
+ maxTokenLength = Integer.valueOf(k);
+ }
+
+ k = args.get(ONLY_FIRST_WORD);
+ if (k != null) {
+ onlyFirstWord = Boolean.valueOf(k);
+ }
+
+ k = args.get(FORCE_FIRST_LETTER);
+ if (k != null) {
+ forceFirstLetter = Boolean.valueOf(k);
+ }
+ }
+
+ public CapitalizationFilter create(TokenStream input) {
+ return new CapitalizationFilter(input, onlyFirstWord, keep,
+ forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link HyphenatedWordsFilter}.
+ *
+ * <fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.HyphenatedWordsFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class HyphenatedWordsFilterFactory extends TokenFilterFactory {
+ public HyphenatedWordsFilter create(TokenStream input) {
+ return new HyphenatedWordsFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java (working copy)
@@ -0,0 +1,96 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
+
+import java.util.Map;
+import java.util.Set;
+import java.io.IOException;
+
+/**
+ * Factory for {@link KeepWordFilter}.
+ *
+ * <fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false" enablePositionIncrements="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class KeepWordFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ assureMatchVersion();
+ }
+
+ public void inform(ResourceLoader loader) {
+ String wordFiles = args.get("words");
+ ignoreCase = getBoolean("ignoreCase", false);
+ enablePositionIncrements = getBoolean("enablePositionIncrements",false);
+
+ if (wordFiles != null) {
+ try {
+ words = getWordSet(loader, wordFiles, ignoreCase);
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading words", e);
+ }
+ }
+ }
+
+ private CharArraySet words;
+ private boolean ignoreCase;
+ private boolean enablePositionIncrements;
+
+ /**
+ * Set the keep word list.
+ * NOTE: if ignoreCase==true, the words are expected to be lowercase
+ */
+ public void setWords(Set words) {
+ this.words = new CharArraySet(luceneMatchVersion, words, ignoreCase);
+ }
+
+ public void setIgnoreCase(boolean ignoreCase) {
+ if (words != null && this.ignoreCase != ignoreCase) {
+ words = new CharArraySet(luceneMatchVersion, words, ignoreCase);
+ }
+ this.ignoreCase = ignoreCase;
+ }
+
+ public boolean isEnablePositionIncrements() {
+ return enablePositionIncrements;
+ }
+
+ public boolean isIgnoreCase() {
+ return ignoreCase;
+ }
+
+ public CharArraySet getWords() {
+ return words;
+ }
+
+ public TokenStream create(TokenStream input) {
+ // if the set is null, it means it was empty
+ return words == null ? input : new KeepWordFilter(enablePositionIncrements, input, words);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (working copy)
@@ -0,0 +1,61 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Factory for {@link KeywordMarkerFilter}.
+ *
+ * <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" ignoreCase="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class KeywordMarkerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ public static final String PROTECTED_TOKENS = "protected";
+ private CharArraySet protectedWords;
+ private boolean ignoreCase;
+
+ public void inform(ResourceLoader loader) {
+ String wordFiles = args.get(PROTECTED_TOKENS);
+ ignoreCase = getBoolean("ignoreCase", false);
+ if (wordFiles != null) {
+ try {
+ protectedWords = getWordSet(loader, wordFiles, ignoreCase);
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading protected words", e);
+ }
+ }
+ }
+
+ public boolean isIgnoreCase() {
+ return ignoreCase;
+ }
+
+ public TokenStream create(TokenStream input) {
+ return protectedWords == null ? input : new KeywordMarkerFilter(input, protectedWords);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java (working copy)
@@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.LengthFilter;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+import java.util.Map;
+
+/**
+ * Factory for {@link LengthFilter}.
+ *
+ * <fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.LengthFilterFactory" min="0" max="1" enablePositionIncrements="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class LengthFilterFactory extends TokenFilterFactory {
+ int min,max;
+ boolean enablePositionIncrements;
+ public static final String MIN_KEY = "min";
+ public static final String MAX_KEY = "max";
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ String minKey = args.get(MIN_KEY);
+ String maxKey = args.get(MAX_KEY);
+ if (minKey == null || maxKey == null) {
+ throw new InitializationException("Both " + MIN_KEY + " and " + MAX_KEY + " are mandatory");
+ }
+ min=Integer.parseInt(minKey);
+ max=Integer.parseInt(maxKey);
+ enablePositionIncrements = getBoolean("enablePositionIncrements",false);
+ }
+
+ public LengthFilter create(TokenStream input) {
+ return new LengthFilter(enablePositionIncrements, input,min,max);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java (working copy)
@@ -0,0 +1,57 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link LimitTokenCountFilter}.
+ *
+ * <fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class LimitTokenCountFilterFactory extends TokenFilterFactory {
+
+ int maxTokenCount;
+
+ @Override
+ public void init(Map args) {
+ super.init( args );
+ String maxTokenCountArg = args.get("maxTokenCount");
+ if (maxTokenCountArg == null) {
+ throw new InitializationException("maxTokenCount is mandatory.");
+ }
+ maxTokenCount = Integer.parseInt(args.get(maxTokenCountArg));
+ }
+
+ @Override
+ public TokenStream create(TokenStream input) {
+ return new LimitTokenCountFilter(input, maxTokenCount);
+ }
+
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link RemoveDuplicatesTokenFilter}.
+ *
+ * <fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class RemoveDuplicatesTokenFilterFactory extends TokenFilterFactory {
+ public RemoveDuplicatesTokenFilter create(TokenStream input) {
+ return new RemoveDuplicatesTokenFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java (working copy)
@@ -0,0 +1,73 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
+import org.apache.lucene.analysis.util.*;
+
+/**
+ * Factory for {@link StemmerOverrideFilter}.
+ *
+ * <fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class StemmerOverrideFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ private CharArrayMap dictionary = null;
+ private boolean ignoreCase;
+
+ public void inform(ResourceLoader loader) {
+ String dictionaryFiles = args.get("dictionary");
+ ignoreCase = getBoolean("ignoreCase", false);
+ if (dictionaryFiles != null) {
+ assureMatchVersion();
+ List files = splitFileNames(dictionaryFiles);
+ try {
+ if (files.size() > 0) {
+ dictionary = new CharArrayMap(luceneMatchVersion,
+ files.size() * 10, ignoreCase);
+ for (String file : files) {
+ List list = loader.getLines(file.trim());
+ for (String line : list) {
+ String[] mapping = line.split("\t", 2);
+ dictionary.put(mapping[0], mapping[1]);
+ }
+ }
+ }
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading dictionary", e);
+ }
+ }
+ }
+
+ public boolean isIgnoreCase() {
+ return ignoreCase;
+ }
+
+ public TokenStream create(TokenStream input) {
+ return dictionary == null ? input : new StemmerOverrideFilter(luceneMatchVersion, input, dictionary);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java (working copy)
@@ -0,0 +1,61 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.TrimFilter;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link TrimFilter}.
+ *
+ * <fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.NGramTokenizerFactory"/>
+ * <filter class="solr.TrimFilterFactory" updateOffsets="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ * @see TrimFilter
+ */
+public class TrimFilterFactory extends TokenFilterFactory {
+
+ protected boolean updateOffsets = false;
+
+ @Override
+ public void init(Map args) {
+ super.init( args );
+
+ String v = args.get( "updateOffsets" );
+ if( v != null ) {
+ try {
+ updateOffsets = Boolean.valueOf( v );
+ }
+ catch( Exception ex ) {
+ throw new InitializationException("Error reading updateOffsets value. Must be true or false.", ex);
+ }
+ }
+ }
+
+ public TrimFilter create(TokenStream input) {
+ return new TrimFilter(input, updateOffsets);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java (working copy)
@@ -0,0 +1,198 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
+import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
+import org.apache.lucene.analysis.util.*;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.io.IOException;
+
+import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
+
+
+/**
+ * Factory for {@link WordDelimiterFilter}.
+ *
+ * <fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.WordDelimiterFilterFactory" protected="protectedword.txt"
+ * preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
+ * catenateWords="0" catenateNumbers="0" catenateAll="0"
+ * generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"
+ * types="wdfftypes.txt" />
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class WordDelimiterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ public static final String PROTECTED_TOKENS = "protected";
+ public static final String TYPES = "types";
+
+ public void inform(ResourceLoader loader) {
+ String wordFiles = args.get(PROTECTED_TOKENS);
+ if (wordFiles != null) {
+ try {
+ protectedWords = getWordSet(loader, wordFiles, false);
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading protected words", e);
+ }
+ }
+ String types = args.get(TYPES);
+ if (types != null) {
+ try {
+ List files = splitFileNames( types );
+ List wlist = new ArrayList();
+ for( String file : files ){
+ List lines = loader.getLines( file.trim() );
+ wlist.addAll( lines );
+ }
+ typeTable = parseTypes(wlist);
+ } catch (IOException e) {
+ throw new InitializationException("IOException while loading types", e);
+ }
+ }
+ }
+
+ private CharArraySet protectedWords = null;
+ private int flags;
+ byte[] typeTable = null;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ if (getInt("generateWordParts", 1) != 0) {
+ flags |= GENERATE_WORD_PARTS;
+ }
+ if (getInt("generateNumberParts", 1) != 0) {
+ flags |= GENERATE_NUMBER_PARTS;
+ }
+ if (getInt("catenateWords", 0) != 0) {
+ flags |= CATENATE_WORDS;
+ }
+ if (getInt("catenateNumbers", 0) != 0) {
+ flags |= CATENATE_NUMBERS;
+ }
+ if (getInt("catenateAll", 0) != 0) {
+ flags |= CATENATE_ALL;
+ }
+ if (getInt("splitOnCaseChange", 1) != 0) {
+ flags |= SPLIT_ON_CASE_CHANGE;
+ }
+ if (getInt("splitOnNumerics", 1) != 0) {
+ flags |= SPLIT_ON_NUMERICS;
+ }
+ if (getInt("preserveOriginal", 0) != 0) {
+ flags |= PRESERVE_ORIGINAL;
+ }
+ if (getInt("stemEnglishPossessive", 1) != 0) {
+ flags |= STEM_ENGLISH_POSSESSIVE;
+ }
+ }
+
+ public WordDelimiterFilter create(TokenStream input) {
+ return new WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
+ flags, protectedWords);
+ }
+
+ // source => type
+ private static Pattern typePattern = Pattern.compile( "(.*)\\s*=>\\s*(.*)\\s*$" );
+
+ // parses a list of MappingCharFilter style rules into a custom byte[] type table
+ private byte[] parseTypes(List rules) {
+ SortedMap typeMap = new TreeMap();
+ for( String rule : rules ){
+ Matcher m = typePattern.matcher(rule);
+ if( !m.find() )
+ throw new InitializationException("Invalid Mapping Rule : [" + rule + "]");
+ String lhs = parseString(m.group(1).trim());
+ Byte rhs = parseType(m.group(2).trim());
+ if (lhs.length() != 1)
+ throw new InitializationException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed.");
+ if (rhs == null)
+ throw new InitializationException("Invalid Mapping Rule : [" + rule + "]. Illegal type.");
+ typeMap.put(lhs.charAt(0), rhs);
+ }
+
+ // ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
+ byte types[] = new byte[Math.max(typeMap.lastKey()+1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.length)];
+ for (int i = 0; i < types.length; i++)
+ types[i] = WordDelimiterIterator.getType(i);
+ for (Map.Entry mapping : typeMap.entrySet())
+ types[mapping.getKey()] = mapping.getValue();
+ return types;
+ }
+
+ private Byte parseType(String s) {
+ if (s.equals("LOWER"))
+ return LOWER;
+ else if (s.equals("UPPER"))
+ return UPPER;
+ else if (s.equals("ALPHA"))
+ return ALPHA;
+ else if (s.equals("DIGIT"))
+ return DIGIT;
+ else if (s.equals("ALPHANUM"))
+ return ALPHANUM;
+ else if (s.equals("SUBWORD_DELIM"))
+ return SUBWORD_DELIM;
+ else
+ return null;
+ }
+
+ char[] out = new char[256];
+
+ private String parseString(String s){
+ int readPos = 0;
+ int len = s.length();
+ int writePos = 0;
+ while( readPos < len ){
+ char c = s.charAt( readPos++ );
+ if( c == '\\' ){
+ if( readPos >= len )
+ throw new InitializationException("Invalid escaped char in [" + s + "]");
+ c = s.charAt( readPos++ );
+ switch( c ) {
+ case '\\' : c = '\\'; break;
+ case 'n' : c = '\n'; break;
+ case 't' : c = '\t'; break;
+ case 'r' : c = '\r'; break;
+ case 'b' : c = '\b'; break;
+ case 'f' : c = '\f'; break;
+ case 'u' :
+ if( readPos + 3 >= len )
+ throw new InitializationException("Invalid escaped char in [" + s + "]");
+ c = (char)Integer.parseInt( s.substring( readPos, readPos + 4 ), 16 );
+ readPos += 4;
+ break;
+ }
+ }
+ out[writePos++] = c;
+ }
+ return new String( out, 0, writePos );
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java (working copy)
@@ -0,0 +1,63 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Creates new instances of {@link EdgeNGramTokenFilter}.
+ *
+ * <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class EdgeNGramFilterFactory extends TokenFilterFactory {
+ private int maxGramSize = 0;
+
+ private int minGramSize = 0;
+
+ private String side;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ String maxArg = args.get("maxGramSize");
+ maxGramSize = (maxArg != null ? Integer.parseInt(maxArg)
+ : EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
+
+ String minArg = args.get("minGramSize");
+ minGramSize = (minArg != null ? Integer.parseInt(minArg)
+ : EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
+
+ side = args.get("side");
+ if (side == null) {
+ side = EdgeNGramTokenFilter.Side.FRONT.getLabel();
+ }
+ }
+
+ public EdgeNGramTokenFilter create(TokenStream input) {
+ return new EdgeNGramTokenFilter(input, side, minGramSize, maxGramSize);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java (working copy)
@@ -0,0 +1,61 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * Creates new instances of {@link EdgeNGramTokenizer}.
+ *
+ * <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class EdgeNGramTokenizerFactory extends TokenizerFactory {
+ private int maxGramSize = 0;
+
+ private int minGramSize = 0;
+
+ private String side;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ String maxArg = args.get("maxGramSize");
+ maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
+
+ String minArg = args.get("minGramSize");
+ minGramSize = (minArg != null ? Integer.parseInt(minArg) : EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);
+
+ side = args.get("side");
+ if (side == null) {
+ side = EdgeNGramTokenizer.Side.FRONT.getLabel();
+ }
+ }
+
+ public EdgeNGramTokenizer create(Reader input) {
+ return new EdgeNGramTokenizer(input, side, minGramSize, maxGramSize);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java (working copy)
@@ -0,0 +1,57 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ngram.NGramTokenFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link NGramTokenFilter}.
+ *
+ * <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class NGramFilterFactory extends TokenFilterFactory {
+ private int maxGramSize = 0;
+
+ private int minGramSize = 0;
+
+ /** Initialize the n-gram min and max sizes and the side from which one should start tokenizing. */
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ String maxArg = args.get("maxGramSize");
+ maxGramSize = (maxArg != null ? Integer.parseInt(maxArg)
+ : NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
+
+ String minArg = args.get("minGramSize");
+ minGramSize = (minArg != null ? Integer.parseInt(minArg)
+ : NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
+ }
+
+ public NGramTokenFilter create(TokenStream input) {
+ return new NGramTokenFilter(input, minGramSize, maxGramSize);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java (working copy)
@@ -0,0 +1,56 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ngram.NGramTokenizer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * Factory for {@link NGramTokenizer}.
+ *
+ * <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class NGramTokenizerFactory extends TokenizerFactory {
+ private int maxGramSize = 0;
+ private int minGramSize = 0;
+
+ /** Initializes the n-gram min and max sizes and the side from which one should start tokenizing. */
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ String maxArg = args.get("maxGramSize");
+ maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
+
+ String minArg = args.get("minGramSize");
+ minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
+ }
+
+ /** Creates the {@link TokenStream} of n-grams from the given {@link Reader}. */
+ public NGramTokenizer create(Reader input) {
+ return new NGramTokenizer(input, minGramSize, maxGramSize);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.no;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link NorwegianLightStemFilter}.
+ *
+ * <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.NorwegianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class NorwegianLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new NorwegianLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.no;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link NorwegianMinimalStemFilter}.
+ *
+ * <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.NorwegianMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class NorwegianMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new NorwegianMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java (working copy)
@@ -0,0 +1,98 @@
+package org.apache.lucene.analysis.path;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
+import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+/**
+ * Factory for {@link PathHierarchyTokenizer}.
+ *
+ * <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PathHierarchyTokenizerFactory extends TokenizerFactory {
+
+ private char delimiter;
+ private char replacement;
+ private boolean reverse = false;
+ private int skip = PathHierarchyTokenizer.DEFAULT_SKIP;
+
+ /**
+ * Require a configured pattern
+ */
+ @Override
+ public void init(Map args){
+ super.init( args );
+
+ String v = args.get( "delimiter" );
+ if( v != null ){
+ if( v.length() != 1 ){
+ throw new InitializationException("delimiter should be a char. \"" + v + "\" is invalid");
+ }
+ else{
+ delimiter = v.charAt(0);
+ }
+ }
+ else{
+ delimiter = PathHierarchyTokenizer.DEFAULT_DELIMITER;
+ }
+
+ v = args.get( "replace" );
+ if( v != null ){
+ if( v.length() != 1 ){
+ throw new InitializationException("replace should be a char. \"" + v + "\" is invalid");
+ }
+ else{
+ replacement = v.charAt(0);
+ }
+ }
+ else{
+ replacement = delimiter;
+ }
+
+ v = args.get( "reverse" );
+ if( v != null ){
+ reverse = "true".equals( v );
+ }
+
+ v = args.get( "skip" );
+ if( v != null ){
+ skip = Integer.parseInt( v );
+ }
+ }
+
+ public Tokenizer create(Reader input) {
+ if( reverse ) {
+ return new ReversePathHierarchyTokenizer(input, delimiter, replacement, skip);
+ }
+ return new PathHierarchyTokenizer(input, delimiter, replacement, skip);
+ }
+}
+
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilterFactory.java (working copy)
@@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.CharFilter;
+import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+
+/**
+ * Factory for {@link PatternReplaceCharFilter}.
+ *
+ * <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.PatternReplaceCharFilterFactory"
+ * pattern="([^a-z])" replacement=""/>
+ * <tokenizer class="solr.KeywordTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ * @since Solr 3.1
+ */
+public class PatternReplaceCharFilterFactory extends CharFilterFactory {
+
+ private Pattern p;
+ private String replacement;
+
+ @Override
+ public void init(Map args) {
+ super.init( args );
+ p = getPattern("pattern");
+ replacement = args.get( "replacement" );
+ if( replacement == null )
+ replacement = "";
+ // TODO: throw exception if you set maxBlockChars or blockDelimiters ?
+ }
+
+ public CharFilter create(Reader input) {
+ return new PatternReplaceCharFilter( p, replacement, input );
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java (working copy)
@@ -0,0 +1,72 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+import java.util.Map;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * Factory for {@link PatternReplaceFilter}.
+ *
+ * <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.KeywordTokenizerFactory"/>
+ * <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement=""
+ * replace="all"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ * @see PatternReplaceFilter
+ */
+public class PatternReplaceFilterFactory extends TokenFilterFactory {
+ Pattern p;
+ String replacement;
+ boolean all = true;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ p = getPattern("pattern");
+ replacement = args.get("replacement");
+
+ String r = args.get("replace");
+ if (null != r) {
+ if (r.equals("all")) {
+ all = true;
+ } else {
+ if (r.equals("first")) {
+ all = false;
+ } else {
+ throw new InitializationException
+ ("Configuration Error: 'replace' must be 'first' or 'all' in "
+ + this.getClass().getName());
+ }
+ }
+ }
+
+ }
+ public PatternReplaceFilter create(TokenStream input) {
+ return new PatternReplaceFilter(input, p, replacement, all);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java (working copy)
@@ -0,0 +1,106 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.pattern.PatternTokenizer;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+/**
+ * Factory for {@link PatternTokenizer}.
+ * This tokenizer uses regex pattern matching to construct distinct tokens
+ * for the input stream. It takes two arguments: "pattern" and "group".
+ *
+ *
+ * - "pattern" is the regular expression.
+ * - "group" says which group to extract into tokens.
+ *
+ *
+ * group=-1 (the default) is equivalent to "split". In this case, the tokens will
+ * be equivalent to the output from (without empty tokens):
+ * {@link String#split(java.lang.String)}
+ *
+ *
+ * Using group >= 0 selects the matching group as the token. For example, if you have:
+ *
+ * pattern = \'([^\']+)\'
+ * group = 0
+ * input = aaa 'bbb' 'ccc'
+ *
+ * the output will be two tokens: 'bbb' and 'ccc' (including the ' marks). With the same input
+ * but using group=1, the output would be: bbb and ccc (no ' marks)
+ *
+ * NOTE: This Tokenizer does not output tokens that are of zero length.
+ *
+ *
+ * <fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.PatternTokenizerFactory" pattern="\'([^\']+)\'" group="1"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ * @see PatternTokenizer
+ * @since solr1.2
+ *
+ */
+public class PatternTokenizerFactory extends TokenizerFactory
+{
+ public static final String PATTERN = "pattern";
+ public static final String GROUP = "group";
+
+ protected Pattern pattern;
+ protected int group;
+
+ /**
+ * Require a configured pattern
+ */
+ @Override
+ public void init(Map args)
+ {
+ super.init(args);
+ pattern = getPattern( PATTERN );
+
+ group = -1; // use 'split'
+ String g = args.get( GROUP );
+ if( g != null ) {
+ try {
+ group = Integer.parseInt( g );
+ }
+ catch( Exception ex ) {
+ throw new InitializationException("invalid group argument: " + g);
+ }
+ }
+ }
+
+ /**
+ * Split the input using configured pattern
+ */
+ public Tokenizer create(final Reader in) {
+ try {
+ return new PatternTokenizer(in, pattern, group);
+ } catch( IOException ex ) {
+ throw new InitializationException("IOException thrown creating PatternTokenizer instance", ex);
+ }
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterFactory.java (working copy)
@@ -0,0 +1,86 @@
+package org.apache.lucene.analysis.payloads;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
+import org.apache.lucene.analysis.payloads.PayloadEncoder;
+import org.apache.lucene.analysis.payloads.FloatEncoder;
+import org.apache.lucene.analysis.payloads.IntegerEncoder;
+import org.apache.lucene.analysis.payloads.IdentityEncoder;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+import java.util.Map;
+
+/**
+ *
+ * Factory for {@link DelimitedPayloadTokenFilter}.
+ *
+ * <fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ */
+public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ public static final String ENCODER_ATTR = "encoder";
+ public static final String DELIMITER_ATTR = "delimiter";
+
+ private PayloadEncoder encoder;
+ private char delimiter = '|';
+
+ public DelimitedPayloadTokenFilter create(TokenStream input) {
+ return new DelimitedPayloadTokenFilter(input, delimiter, encoder);
+ }
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ }
+
+ public void inform(ResourceLoader loader) {
+ String encoderClass = args.get(ENCODER_ATTR);
+ if (encoderClass == null) {
+ throw new InitializationException("Parameter " + ENCODER_ATTR + " is mandatory");
+ }
+ if (encoderClass.equals("float")){
+ encoder = new FloatEncoder();
+ } else if (encoderClass.equals("integer")){
+ encoder = new IntegerEncoder();
+ } else if (encoderClass.equals("identity")){
+ encoder = new IdentityEncoder();
+ } else {
+ encoder = loader.newInstance(encoderClass, PayloadEncoder.class);
+ }
+
+ String delim = args.get(DELIMITER_ATTR);
+ if (delim != null){
+ if (delim.length() == 1) {
+ delimiter = delim.charAt(0);
+ } else{
+ throw new InitializationException("Delimiter must be one character only");
+ }
+ }
+ }
+}
\ No newline at end of file
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (revision 1365483)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (working copy)
@@ -40,6 +40,9 @@
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
super(input);
+ if (typeMatch == null) {
+ throw new IllegalArgumentException("typeMatch cannot be null");
+ }
//Need to encode the payload
thePayload = new BytesRef(PayloadHelper.encodeFloat(payload));
this.typeMatch = typeMatch;
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterFactory.java (working copy)
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.payloads;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import java.util.Map;
+
+/**
+ * Factory for {@link NumericPayloadTokenFilter}.
+ *
+ * <fieldType name="text_numpayload" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class NumericPayloadTokenFilterFactory extends TokenFilterFactory {
+ private float payload;
+ private String typeMatch;
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ String payloadArg = args.get("payload");
+ typeMatch = args.get("typeMatch");
+ if (payloadArg == null || typeMatch == null) {
+ throw new InitializationException("Both payload and typeMatch are required");
+ }
+ payload = Float.parseFloat(payloadArg);
+ }
+ public NumericPayloadTokenFilter create(TokenStream input) {
+ return new NumericPayloadTokenFilter(input,payload,typeMatch);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.payloads;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link TokenOffsetPayloadTokenFilter}.
+ *
+ * <fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.TokenOffsetPayloadTokenFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class TokenOffsetPayloadTokenFilterFactory extends TokenFilterFactory {
+ public TokenOffsetPayloadTokenFilter create(TokenStream input) {
+ return new TokenOffsetPayloadTokenFilter(input);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.payloads;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link TypeAsPayloadTokenFilter}.
+ *
+ * <fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.TypeAsPayloadTokenFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class TypeAsPayloadTokenFilterFactory extends TokenFilterFactory {
+ public TypeAsPayloadTokenFilter create(TokenStream input) {
+ return new TypeAsPayloadTokenFilter(input);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java (working copy)
@@ -0,0 +1,55 @@
+package org.apache.lucene.analysis.position;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.position.PositionFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+import java.util.Map;
+
+/**
+ * Factory for {@link PositionFilter}.
+ * Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its
+ * original positionIncrement value. The default positionIncrement value is zero.
+ *
+ * <fieldType name="text_position" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.PositionFilterFactory" positionIncrement="0"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ * @see org.apache.lucene.analysis.position.PositionFilter
+ * @since solr 1.4
+ */
+public class PositionFilterFactory extends TokenFilterFactory {
+ private int positionIncrement;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ positionIncrement = getInt("positionIncrement", 0);
+ }
+
+ public PositionFilter create(TokenStream input) {
+ return new PositionFilter(input, positionIncrement);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.pt;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link PortugueseLightStemFilter}.
+ *
+ * <fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.PortugueseLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PortugueseLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new PortugueseLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.pt;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link PortugueseMinimalStemFilter}.
+ *
+ * <fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.PortugueseMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PortugueseMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new PortugueseMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.pt;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pt.PortugueseStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link PortugueseStemFilter}.
+ *
+ * <fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.PortugueseStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PortugueseStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new PortugueseStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (working copy)
@@ -0,0 +1,43 @@
+package org.apache.lucene.analysis.reverse;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.reverse.ReverseStringFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link ReverseStringFilter}.
+ *
+ * <fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ReverseStringFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ * @since solr 1.4
+ */
+public class ReverseStringFilterFactory extends TokenFilterFactory {
+ public ReverseStringFilter create(TokenStream in) {
+ assureMatchVersion();
+ return new ReverseStringFilter(luceneMatchVersion,in);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.ru;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ru.RussianLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link RussianLightStemFilter}.
+ *
+ * <fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.RussianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class RussianLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new RussianLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java (working copy)
@@ -0,0 +1,80 @@
+package org.apache.lucene.analysis.shingle;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.shingle.ShingleFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+import java.util.Map;
+
+/**
+ * Factory for {@link ShingleFilter}.
+ *
+ * <fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="2"
+ * outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ShingleFilterFactory extends TokenFilterFactory {
+ private int minShingleSize;
+ private int maxShingleSize;
+ private boolean outputUnigrams;
+ private boolean outputUnigramsIfNoShingles;
+ private String tokenSeparator;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ maxShingleSize = getInt("maxShingleSize",
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+ if (maxShingleSize < 2) {
+ throw new InitializationException("Invalid maxShingleSize (" + maxShingleSize
+ + ") - must be at least 2");
+ }
+ minShingleSize = getInt("minShingleSize",
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
+ if (minShingleSize < 2) {
+ throw new InitializationException("Invalid minShingleSize (" + minShingleSize
+ + ") - must be at least 2");
+ }
+ if (minShingleSize > maxShingleSize) {
+ throw new InitializationException("Invalid minShingleSize (" + minShingleSize
+ + ") - must be no greater than maxShingleSize ("
+ + maxShingleSize + ")");
+ }
+ outputUnigrams = getBoolean("outputUnigrams", true);
+ outputUnigramsIfNoShingles = getBoolean("outputUnigramsIfNoShingles", false);
+ tokenSeparator = args.containsKey("tokenSeparator")
+ ? args.get("tokenSeparator")
+ : ShingleFilter.TOKEN_SEPARATOR;
+ }
+ public ShingleFilter create(TokenStream input) {
+ ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize);
+ r.setOutputUnigrams(outputUnigrams);
+ r.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
+ r.setTokenSeparator(tokenSeparator);
+ return r;
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (working copy)
@@ -0,0 +1,91 @@
+package org.apache.lucene.analysis.snowball;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.util.*;
+import org.tartarus.snowball.SnowballProgram;
+
+/**
+ * Factory for {@link SnowballFilter}, with configurable language
+ *
+ * Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
+ *
+ * <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ */
+public class SnowballPorterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ public static final String PROTECTED_TOKENS = "protected";
+
+ private String language = "English";
+ private Class> stemClass;
+
+
+ public void inform(ResourceLoader loader) {
+ String wordFiles = args.get(PROTECTED_TOKENS);
+ if (wordFiles != null) {
+ try {
+ protectedWords = getWordSet(loader, wordFiles, false);
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading protected words", e);
+ }
+ }
+ }
+
+ private CharArraySet protectedWords = null;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ final String cfgLanguage = args.get("language");
+ if(cfgLanguage!=null) language = cfgLanguage;
+
+ try {
+ stemClass = Class.forName("org.tartarus.snowball.ext." + language + "Stemmer");
+ } catch (ClassNotFoundException e) {
+ throw new InitializationException("Can't find class for stemmer language " + language, e);
+ }
+ }
+
+ public TokenFilter create(TokenStream input) {
+ SnowballProgram program;
+ try {
+ program = (SnowballProgram)stemClass.newInstance();
+ } catch (Exception e) {
+ throw new InitializationException("Error instantiating stemmer for language " + language + "from class " + stemClass, e);
+ }
+
+ if (protectedWords != null)
+ input = new KeywordMarkerFilter(input, protectedWords);
+ return new SnowballFilter(input, program);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.standard;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.standard.ClassicFilter;
+
+/**
+ * Factory for {@link ClassicFilter}.
+ *
+ * <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.ClassicTokenizerFactory"/>
+ * <filter class="solr.ClassicFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ */
+public class ClassicFilterFactory extends TokenFilterFactory {
+ public TokenFilter create(TokenStream input) {
+ return new ClassicFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (working copy)
@@ -0,0 +1,57 @@
+package org.apache.lucene.analysis.standard;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.ClassicTokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * Factory for {@link ClassicTokenizer}.
+ *
+ * <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ */
+
+public class ClassicTokenizerFactory extends TokenizerFactory {
+
+ private int maxTokenLength;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ assureMatchVersion();
+ maxTokenLength = getInt("maxTokenLength",
+ StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ }
+
+ public Tokenizer create(Reader input) {
+ ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, input);
+ tokenizer.setMaxTokenLength(maxTokenLength);
+ return tokenizer;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.analysis.standard;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link StandardFilter}.
+ *
+ * <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.StandardFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class StandardFilterFactory extends TokenFilterFactory {
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ assureMatchVersion();
+ }
+
+ public StandardFilter create(TokenStream input) {
+ return new StandardFilter(luceneMatchVersion, input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (working copy)
@@ -0,0 +1,56 @@
+package org.apache.lucene.analysis.standard;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * Factory for {@link StandardTokenizer}.
+ *
+ * <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+
+public class StandardTokenizerFactory extends TokenizerFactory {
+
+ private int maxTokenLength;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ assureMatchVersion();
+ maxTokenLength = getInt("maxTokenLength",
+ StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ }
+
+ public StandardTokenizer create(Reader input) {
+ StandardTokenizer tokenizer
+ = new StandardTokenizer(luceneMatchVersion, input);
+ tokenizer.setMaxTokenLength(maxTokenLength);
+ return tokenizer;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (working copy)
@@ -0,0 +1,56 @@
+package org.apache.lucene.analysis.standard;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * Factory for {@link UAX29URLEmailTokenizer}.
+ *
+ * <fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ */
+
+public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
+
+ private int maxTokenLength;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ assureMatchVersion();
+ maxTokenLength = getInt("maxTokenLength",
+ StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ }
+
+ public UAX29URLEmailTokenizer create(Reader input) {
+ UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, input);
+ tokenizer.setMaxTokenLength(maxTokenLength);
+ return tokenizer;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.sv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link SwedishLightStemFilter}.
+ *
+ * <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.SwedishLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class SwedishLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new SwedishLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (working copy)
@@ -0,0 +1,165 @@
+package org.apache.lucene.analysis.synonym;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.text.ParseException;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.synonym.SynonymFilter;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.analysis.synonym.SolrSynonymParser;
+import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
+import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.util.Version;
+
+/**
+ * Factory for {@link SynonymFilter}.
+ *
+ * <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
+ * format="solr" ignoreCase="false" expand="true"
+ * tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ private SynonymMap map;
+ private boolean ignoreCase;
+
+ @Override
+ public TokenStream create(TokenStream input) {
+ // if the fst is null, it means there's actually no synonyms... just return the original stream
+ // as there is nothing to do here.
+ return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
+ }
+
+ @Override
+ public void inform(ResourceLoader loader) {
+ final boolean ignoreCase = getBoolean("ignoreCase", false);
+ this.ignoreCase = ignoreCase;
+
+ String tf = args.get("tokenizerFactory");
+
+ final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
+
+ Analyzer analyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50, reader) : factory.create(reader);
+ TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer) : tokenizer;
+ return new TokenStreamComponents(tokenizer, stream);
+ }
+ };
+
+ String format = args.get("format");
+ try {
+ if (format == null || format.equals("solr")) {
+ // TODO: expose dedup as a parameter?
+ map = loadSolrSynonyms(loader, true, analyzer);
+ } else if (format.equals("wordnet")) {
+ map = loadWordnetSynonyms(loader, true, analyzer);
+ } else {
+ // TODO: somehow make this more pluggable
+ throw new InitializationException("Unrecognized synonyms format: " + format);
+ }
+ } catch (Exception e) {
+ throw new InitializationException("Exception thrown while loading synonyms", e);
+ }
+ }
+
+ /**
+ * Load synonyms from the solr format, "format=solr".
+ */
+ private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
+ final boolean expand = getBoolean("expand", true);
+ String synonyms = args.get("synonyms");
+ if (synonyms == null)
+ throw new InitializationException("Missing required argument 'synonyms'.");
+
+ CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+
+ SolrSynonymParser parser = new SolrSynonymParser(dedup, expand, analyzer);
+ File synonymFile = new File(synonyms);
+ if (synonymFile.exists()) {
+ decoder.reset();
+ parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
+ } else {
+ List files = splitFileNames(synonyms);
+ for (String file : files) {
+ decoder.reset();
+ parser.add(new InputStreamReader(loader.openResource(file), decoder));
+ }
+ }
+ return parser.build();
+ }
+
+ /**
+ * Load synonyms from the wordnet format, "format=wordnet".
+ */
+ private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
+ final boolean expand = getBoolean("expand", true);
+ String synonyms = args.get("synonyms");
+ if (synonyms == null)
+ throw new InitializationException("Missing required argument 'synonyms'.");
+
+ CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+
+ WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer);
+ File synonymFile = new File(synonyms);
+ if (synonymFile.exists()) {
+ decoder.reset();
+ parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
+ } else {
+ List files = splitFileNames(synonyms);
+ for (String file : files) {
+ decoder.reset();
+ parser.add(new InputStreamReader(loader.openResource(file), decoder));
+ }
+ }
+ return parser.build();
+ }
+
+ // (there are no tests for this functionality)
+ private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname){
+ TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
+ tokFactory.setLuceneMatchVersion(luceneMatchVersion);
+ tokFactory.init(args);
+ if (tokFactory instanceof ResourceLoaderAware) {
+ ((ResourceLoaderAware) tokFactory).inform(loader);
+ }
+ return tokFactory;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (working copy)
@@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.th;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.th.ThaiWordFilter;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link ThaiWordFilter}.
+ *
+ * <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.ThaiWordFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ThaiWordFilterFactory extends TokenFilterFactory {
+ public ThaiWordFilter create(TokenStream input) {
+ assureMatchVersion();
+ return new ThaiWordFilter(luceneMatchVersion, input);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (working copy)
@@ -0,0 +1,46 @@
+package org.apache.lucene.analysis.tr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link TurkishLowerCaseFilter}.
+ *
+ * <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.TurkishLowerCaseFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class TurkishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public TokenStream create(TokenStream input) {
+ return new TurkishLowerCaseFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java (working copy)
@@ -0,0 +1,103 @@
+package org.apache.lucene.analysis.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Locale;
+import java.util.Map;
+import java.util.LinkedHashMap;
+import java.util.Set;
+import java.util.ServiceConfigurationError;
+
+import org.apache.lucene.util.SPIClassIterator;
+
+/**
+ * Helper class for loading named SPIs from classpath (e.g. Tokenizers, TokenStreams).
+ * @lucene.internal
+ */
+public final class AnalysisSPILoader {
+
+ private final Map> services;
+ private final Class clazz;
+
+ public AnalysisSPILoader(Class clazz) {
+ this(clazz, new String[] { clazz.getSimpleName() });
+ }
+
+ public AnalysisSPILoader(Class clazz, ClassLoader loader) {
+ this(clazz, new String[] { clazz.getSimpleName() }, loader);
+ }
+
+ public AnalysisSPILoader(Class clazz, String[] suffixes) {
+ this(clazz, suffixes, Thread.currentThread().getContextClassLoader());
+ }
+
+ public AnalysisSPILoader(Class clazz, String[] suffixes, ClassLoader classloader) {
+ this.clazz = clazz;
+ final SPIClassIterator loader = SPIClassIterator.get(clazz, classloader);
+ final LinkedHashMap> services = new LinkedHashMap>();
+ while (loader.hasNext()) {
+ final Class extends S> service = loader.next();
+ final String clazzName = service.getSimpleName();
+ String name = null;
+ for (String suffix : suffixes) {
+ if (clazzName.endsWith(suffix)) {
+ name = clazzName.substring(0, clazzName.length() - suffix.length()).toLowerCase(Locale.ROOT);
+ break;
+ }
+ }
+ if (name == null) {
+ throw new ServiceConfigurationError("The class name " + service.getName() +
+ " has wrong suffix, allowed are: " + Arrays.toString(suffixes));
+ }
+ // only add the first one for each name, later services will be ignored
+ // this allows to place services before others in classpath to make
+ // them used instead of others
+ if (!services.containsKey(name)) {
+ services.put(name, service);
+ }
+ }
+ this.services = Collections.unmodifiableMap(services);
+ }
+
+ public S newInstance(String name) {
+ final Class extends S> service = lookupClass(name);
+ try {
+ return service.newInstance();
+ } catch (Exception e) {
+ throw new IllegalArgumentException("SPI class of type "+clazz.getName()+" with name '"+name+"' cannot be instantiated. " +
+ "This is likely due to a misconfiguration of the java class '" + service.getName() + "': ", e);
+ }
+ }
+
+ public Class extends S> lookupClass(String name) {
+ final Class extends S> service = services.get(name.toLowerCase(Locale.ROOT));
+ if (service != null) {
+ return service;
+ } else {
+ throw new IllegalArgumentException("A SPI class of type "+clazz.getName()+" with name '"+name+"' does not exist. "+
+ "You need to add the corresponding JAR file supporting this SPI to your classpath."+
+ "The current classpath supports the following names: "+availableServices());
+ }
+ }
+
+ public Set availableServices() {
+ return services.keySet();
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (revision 1365483)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.io.Reader;
+import java.util.Set;
import org.apache.lucene.analysis.CharFilter;
@@ -27,5 +28,32 @@
*/
public abstract class CharFilterFactory extends AbstractAnalysisFactory {
- public abstract CharFilter create(Reader input);
+ private static final AnalysisSPILoader loader =
+ getSPILoader(Thread.currentThread().getContextClassLoader());
+
+ /**
+ * Used by e.g. Apache Solr to get a correctly configured instance
+ * of {@link AnalysisSPILoader} from Solr's classpath.
+ * @lucene.internal
+ */
+ public static AnalysisSPILoader getSPILoader(ClassLoader classloader) {
+ return new AnalysisSPILoader(CharFilterFactory.class, classloader);
+ }
+
+ /** looks up a charfilter by name from context classpath */
+ public static CharFilterFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a charfilter class by name from context classpath */
+ public static Class extends CharFilterFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available charfilter names */
+ public static Set availableCharFilters() {
+ return loader.availableServices();
+ }
+
+ public abstract Reader create(Reader input);
}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java (revision 1365483)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java (working copy)
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import java.util.Set;
+
import org.apache.lucene.analysis.TokenStream;
/**
@@ -25,6 +27,34 @@
*/
public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
+ private static final AnalysisSPILoader loader =
+ getSPILoader(Thread.currentThread().getContextClassLoader());
+
+ /**
+ * Used by e.g. Apache Solr to get a correctly configured instance
+ * of {@link AnalysisSPILoader} from Solr's classpath.
+ * @lucene.internal
+ */
+ public static AnalysisSPILoader getSPILoader(ClassLoader classloader) {
+ return new AnalysisSPILoader(TokenFilterFactory.class,
+ new String[] { "TokenFilterFactory", "FilterFactory" }, classloader);
+ }
+
+ /** looks up a tokenfilter by name from context classpath */
+ public static TokenFilterFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a tokenfilter class by name from context classpath */
+ public static Class extends TokenFilterFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available tokenfilter names from context classpath */
+ public static Set availableTokenFilters() {
+ return loader.availableServices();
+ }
+
/** Transform the specified input TokenStream */
public abstract TokenStream create(TokenStream input);
}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (revision 1365483)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (working copy)
@@ -20,6 +20,7 @@
import org.apache.lucene.analysis.Tokenizer;
import java.io.Reader;
+import java.util.Set;
/**
* Abstract parent class for analysis factories that create {@link Tokenizer}
@@ -27,6 +28,33 @@
*/
public abstract class TokenizerFactory extends AbstractAnalysisFactory {
+ private static final AnalysisSPILoader loader =
+ getSPILoader(Thread.currentThread().getContextClassLoader());
+
+ /**
+ * Used by e.g. Apache Solr to get a correctly configured instance
+ * of {@link AnalysisSPILoader} from Solr's classpath.
+ * @lucene.internal
+ */
+ public static AnalysisSPILoader getSPILoader(ClassLoader classloader) {
+ return new AnalysisSPILoader(TokenizerFactory.class, classloader);
+ }
+
+ /** looks up a tokenizer by name from context classpath */
+ public static TokenizerFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a tokenizer class by name from context classpath */
+ public static Class extends TokenizerFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available tokenizer names from context classpath */
+ public static Set availableTokenizers() {
+ return loader.availableServices();
+ }
+
/** Creates a TokenStream of the specified input */
public abstract Tokenizer create(Reader input);
}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.wikipedia;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
+
+/**
+ * Factory for {@link WikipediaTokenizer}.
+ *
+ * <fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WikipediaTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class WikipediaTokenizerFactory extends TokenizerFactory {
+ // TODO: add support for WikipediaTokenizer's advanced options.
+ public Tokenizer create(Reader input) {
+ return new WikipediaTokenizer(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory
===================================================================
--- lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory (revision 0)
+++ lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory (working copy)
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory
+org.apache.lucene.analysis.charfilter.MappingCharFilterFactory
+org.apache.lucene.analysis.fa.PersianCharFilterFactory
+org.apache.lucene.analysis.pattern.PatternReplaceCharFilterFactory
Index: lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
===================================================================
--- lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (revision 0)
+++ lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (working copy)
@@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.lucene.analysis.ar.ArabicNormalizationFilterFactory
+org.apache.lucene.analysis.ar.ArabicStemFilterFactory
+org.apache.lucene.analysis.bg.BulgarianStemFilterFactory
+org.apache.lucene.analysis.br.BrazilianStemFilterFactory
+org.apache.lucene.analysis.cjk.CJKBigramFilterFactory
+org.apache.lucene.analysis.cjk.CJKWidthFilterFactory
+org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory
+org.apache.lucene.analysis.commongrams.CommonGramsQueryFilterFactory
+org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilterFactory
+org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilterFactory
+org.apache.lucene.analysis.core.LowerCaseFilterFactory
+org.apache.lucene.analysis.core.StopFilterFactory
+org.apache.lucene.analysis.core.TypeTokenFilterFactory
+org.apache.lucene.analysis.cz.CzechStemFilterFactory
+org.apache.lucene.analysis.de.GermanLightStemFilterFactory
+org.apache.lucene.analysis.de.GermanMinimalStemFilterFactory
+org.apache.lucene.analysis.de.GermanNormalizationFilterFactory
+org.apache.lucene.analysis.de.GermanStemFilterFactory
+org.apache.lucene.analysis.el.GreekLowerCaseFilterFactory
+org.apache.lucene.analysis.el.GreekStemFilterFactory
+org.apache.lucene.analysis.en.EnglishMinimalStemFilterFactory
+org.apache.lucene.analysis.en.EnglishPossessiveFilterFactory
+org.apache.lucene.analysis.en.KStemFilterFactory
+org.apache.lucene.analysis.en.PorterStemFilterFactory
+org.apache.lucene.analysis.es.SpanishLightStemFilterFactory
+org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory
+org.apache.lucene.analysis.fi.FinnishLightStemFilterFactory
+org.apache.lucene.analysis.fr.ElisionFilterFactory
+org.apache.lucene.analysis.fr.FrenchLightStemFilterFactory
+org.apache.lucene.analysis.fr.FrenchMinimalStemFilterFactory
+org.apache.lucene.analysis.ga.IrishLowerCaseFilterFactory
+org.apache.lucene.analysis.gl.GalicianMinimalStemFilterFactory
+org.apache.lucene.analysis.gl.GalicianStemFilterFactory
+org.apache.lucene.analysis.hi.HindiNormalizationFilterFactory
+org.apache.lucene.analysis.hi.HindiStemFilterFactory
+org.apache.lucene.analysis.hu.HungarianLightStemFilterFactory
+org.apache.lucene.analysis.hunspell.HunspellStemFilterFactory
+org.apache.lucene.analysis.id.IndonesianStemFilterFactory
+org.apache.lucene.analysis.in.IndicNormalizationFilterFactory
+org.apache.lucene.analysis.it.ItalianLightStemFilterFactory
+org.apache.lucene.analysis.lv.LatvianStemFilterFactory
+org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory
+org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
+org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory
+org.apache.lucene.analysis.miscellaneous.KeepWordFilterFactory
+org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilterFactory
+org.apache.lucene.analysis.miscellaneous.LengthFilterFactory
+org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory
+org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory
+org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory
+org.apache.lucene.analysis.miscellaneous.TrimFilterFactory
+org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory
+org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory
+org.apache.lucene.analysis.ngram.NGramFilterFactory
+org.apache.lucene.analysis.no.NorwegianLightStemFilterFactory
+org.apache.lucene.analysis.no.NorwegianMinimalStemFilterFactory
+org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory
+org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory
+org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterFactory
+org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory
+org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory
+org.apache.lucene.analysis.position.PositionFilterFactory
+org.apache.lucene.analysis.pt.PortugueseLightStemFilterFactory
+org.apache.lucene.analysis.pt.PortugueseMinimalStemFilterFactory
+org.apache.lucene.analysis.pt.PortugueseStemFilterFactory
+org.apache.lucene.analysis.reverse.ReverseStringFilterFactory
+org.apache.lucene.analysis.ru.RussianLightStemFilterFactory
+org.apache.lucene.analysis.shingle.ShingleFilterFactory
+org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory
+org.apache.lucene.analysis.standard.ClassicFilterFactory
+org.apache.lucene.analysis.standard.StandardFilterFactory
+org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory
+org.apache.lucene.analysis.synonym.SynonymFilterFactory
+org.apache.lucene.analysis.th.ThaiWordFilterFactory
+org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory
Index: lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
===================================================================
--- lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (revision 0)
+++ lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (working copy)
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.lucene.analysis.core.KeywordTokenizerFactory
+org.apache.lucene.analysis.core.LetterTokenizerFactory
+org.apache.lucene.analysis.core.LowerCaseTokenizerFactory
+org.apache.lucene.analysis.core.WhitespaceTokenizerFactory
+org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory
+org.apache.lucene.analysis.ngram.NGramTokenizerFactory
+org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory
+org.apache.lucene.analysis.pattern.PatternTokenizerFactory
+org.apache.lucene.analysis.standard.ClassicTokenizerFactory
+org.apache.lucene.analysis.standard.StandardTokenizerFactory
+org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory
+org.apache.lucene.analysis.wikipedia.WikipediaTokenizerFactory
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java (working copy)
@@ -0,0 +1,85 @@
+package org.apache.lucene.analysis.ar;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.fa.PersianCharFilterFactory;
+import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
+
+/**
+ * Simple tests to ensure the Arabic filter Factories are working.
+ */
+public class TestArabicFilters extends BaseTokenStreamTestCase {
+
+ /**
+ * Test ArabicNormalizationFilterFactory
+ */
+ public void testNormalizer() throws Exception {
+ Reader reader = new StringReader("الذين مَلكت أيمانكم");
+ StandardTokenizerFactory factory = new StandardTokenizerFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ ArabicNormalizationFilterFactory filterFactory = new ArabicNormalizationFilterFactory();
+ filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ filterFactory.init(args);
+ Tokenizer tokenizer = factory.create(reader);
+ TokenStream stream = filterFactory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] {"الذين", "ملكت", "ايمانكم"});
+ }
+
+ /**
+ * Test ArabicStemFilterFactory
+ */
+ public void testStemmer() throws Exception {
+ Reader reader = new StringReader("الذين مَلكت أيمانكم");
+ StandardTokenizerFactory factory = new StandardTokenizerFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ ArabicNormalizationFilterFactory normFactory = new ArabicNormalizationFilterFactory();
+ normFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ ArabicStemFilterFactory stemFactory = new ArabicStemFilterFactory();
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ normFactory.init(args);
+ Tokenizer tokenizer = factory.create(reader);
+ TokenStream stream = normFactory.create(tokenizer);
+ stream = stemFactory.create(stream);
+ assertTokenStreamContents(stream, new String[] {"ذين", "ملكت", "ايمانكم"});
+ }
+
+ /**
+ * Test PersianCharFilterFactory
+ */
+ public void testPersianCharFilter() throws Exception {
+ Reader reader = new StringReader("میخورد");
+ PersianCharFilterFactory charfilterFactory = new PersianCharFilterFactory();
+ StandardTokenizerFactory tokenizerFactory = new StandardTokenizerFactory();
+ tokenizerFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ tokenizerFactory.init(args);
+ TokenStream stream = tokenizerFactory.create(charfilterFactory.create(reader));
+ assertTokenStreamContents(stream, new String[] { "می", "خورد" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemFilterFactory.java (working copy)
@@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.bg;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the Bulgarian stem filter factory is working.
+ */
+public class TestBulgarianStemFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually stems text.
+ */
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("компютри");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ BulgarianStemFilterFactory factory = new BulgarianStemFilterFactory();
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "компютр" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemFilterFactory.java (working copy)
@@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.br;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the Brazilian stem filter factory is working.
+ */
+public class TestBrazilianStemFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually stems and normalizes text.
+ */
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("Brasília");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ BrazilianStemFilterFactory factory = new BrazilianStemFilterFactory();
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "brasil" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilterFactory.java (working copy)
@@ -0,0 +1,127 @@
+package org.apache.lucene.analysis.charfilter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.*;
+
+/**
+ * Simple tests to ensure this factory is working
+ */
+public class TestHTMLStripCharFilterFactory extends BaseTokenStreamTestCase {
+
+
+ public void testNothingChanged() throws IOException {
+ // 11111111112
+ // 012345678901234567890
+ final String text = "this is only a test.";
+ HTMLStripCharFilterFactory factory = new HTMLStripCharFilterFactory();
+ Map args = new HashMap();
+ args.put("escapedTags", "a, Title");
+ factory.init(args);
+ CharFilter cs = factory.create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts,
+ new String[] { "this", "is", "only", "a", "test." },
+ new int[] { 0, 5, 8, 13, 15 },
+ new int[] { 4, 7, 12, 14, 20 });
+ }
+
+ public void testNoEscapedTags() throws IOException {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ final String text = "this is only a test.";
+ HTMLStripCharFilterFactory factory = new HTMLStripCharFilterFactory();
+ Map args = new HashMap();
+ factory.init(args);
+ CharFilter cs = factory.create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts,
+ new String[] { "this", "is", "only", "a", "test." },
+ new int[] { 3, 12, 18, 27, 32 },
+ new int[] { 11, 14, 26, 28, 41 });
+ }
+
+ public void testEscapedTags() throws IOException {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ final String text = "this is only a test.";
+ HTMLStripCharFilterFactory factory = new HTMLStripCharFilterFactory();
+ Map args = new HashMap();
+ args.put("escapedTags", "U i");
+ factory.init(args);
+ CharFilter cs = factory.create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts,
+ new String[] { "this", "is", "only", "a", "test." },
+ new int[] { 0, 12, 18, 27, 29 },
+ new int[] { 11, 14, 26, 28, 41 });
+ }
+
+ public void testSeparatorOnlyEscapedTags() throws IOException {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ final String text = "this is only a test.";
+ HTMLStripCharFilterFactory factory = new HTMLStripCharFilterFactory();
+ Map args = new HashMap();
+ args.put("escapedTags", ",, , ");
+ factory.init(args);
+ CharFilter cs = factory.create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts,
+ new String[] { "this", "is", "only", "a", "test." },
+ new int[] { 3, 12, 18, 27, 32 },
+ new int[] { 11, 14, 26, 28, 41 });
+ }
+
+ public void testEmptyEscapedTags() throws IOException {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ final String text = "this is only a test.";
+ HTMLStripCharFilterFactory factory = new HTMLStripCharFilterFactory();
+ Map args = new HashMap();
+ args.put("escapedTags", "");
+ factory.init(args);
+ CharFilter cs = factory.create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts,
+ new String[] { "this", "is", "only", "a", "test." },
+ new int[] { 3, 12, 18, 27, 32 },
+ new int[] { 11, 14, 26, 28, 41 });
+ }
+
+ public void testSingleEscapedTag() throws IOException {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ final String text = "this is only a test.";
+ HTMLStripCharFilterFactory factory = new HTMLStripCharFilterFactory();
+ Map args = new HashMap();
+ args.put("escapedTags", ", B\r\n\t");
+ factory.init(args);
+ CharFilter cs = factory.create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts,
+ new String[] { "this", "is", "only", "a", "test." },
+ new int[] { 3, 12, 15, 27, 32 },
+ new int[] { 11, 14, 26, 28, 41 });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilterFactory.java (working copy)
@@ -0,0 +1,53 @@
+package org.apache.lucene.analysis.charfilter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestMappingCharFilterFactory extends LuceneTestCase {
+ public void testParseString() throws Exception {
+
+ MappingCharFilterFactory f = new MappingCharFilterFactory();
+
+ try {
+ f.parseString( "\\" );
+ fail( "escape character cannot be alone." );
+ }
+ catch (InitializationException expected) {}
+
+ assertEquals( "unexpected escaped characters",
+ "\\\"\n\t\r\b\f", f.parseString( "\\\\\\\"\\n\\t\\r\\b\\f" ) );
+ assertEquals( "unexpected escaped characters",
+ "A", f.parseString( "\\u0041" ) );
+ assertEquals( "unexpected escaped characters",
+ "AB", f.parseString( "\\u0041\\u0042" ) );
+
+ try {
+ f.parseString( "\\u000" );
+ fail( "invalid length check." );
+ }
+ catch (InitializationException expected) {}
+
+ try {
+ f.parseString( "\\u123x" );
+ fail( "invalid hex number check." );
+ }
+ catch( NumberFormatException expected ){}
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Date Author Id Revision HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java (working copy)
@@ -0,0 +1,55 @@
+package org.apache.lucene.analysis.cjk;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+/**
+ * Simple tests to ensure the CJK bigram factory is working.
+ */
+public class TestCJKBigramFilterFactory extends BaseTokenStreamTestCase {
+ public void testDefaults() throws Exception {
+ Reader reader = new StringReader("多くの学生が試験に落ちた。");
+ CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
+ assertTokenStreamContents(stream,
+ new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
+ }
+
+ public void testHanOnly() throws Exception {
+ Reader reader = new StringReader("多くの学生が試験に落ちた。");
+ CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
+ Map args = new HashMap();
+ args.put("hiragana", "false");
+ factory.init(args);
+ TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
+ assertTokenStreamContents(stream,
+ new String[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.cjk;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the CJKWidthFilterFactory is working
+ */
+public class TestCJKWidthFilterFactory extends BaseTokenStreamTestCase {
+ public void test() throws Exception {
+ Reader reader = new StringReader("Test 1234");
+ CJKWidthFilterFactory factory = new CJKWidthFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "Test", "1234" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java (working copy)
@@ -0,0 +1,107 @@
+package org.apache.lucene.analysis.commongrams;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.TestStopFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.Map;
+import java.util.HashMap;
+
+/**
+ * Tests pretty much copied from StopFilterFactoryTest We use the test files
+ * used by the StopFilterFactoryTest TODO: consider creating separate test files
+ * so this won't break if stop filter test files change
+ **/
+public class TestCommonGramsFilterFactory extends BaseTokenStreamTestCase {
+
+ public void testInform() throws Exception {
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
+ assertTrue("loader is null and it shouldn't be", loader != null);
+ CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
+ Map args = new HashMap();
+ args.put("words", "stop-1.txt");
+ args.put("ignoreCase", "true");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ CharArraySet words = factory.getCommonWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue("words Size: " + words.size() + " is not: " + 2,
+ words.size() == 2);
+ assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
+ .isIgnoreCase() == true);
+
+ factory = new CommonGramsFilterFactory();
+ args.put("words", "stop-1.txt, stop-2.txt");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ words = factory.getCommonWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue("words Size: " + words.size() + " is not: " + 4,
+ words.size() == 4);
+ assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
+ .isIgnoreCase() == true);
+
+ factory = new CommonGramsFilterFactory();
+ args.put("words", "stop-snowball.txt");
+ args.put("format", "snowball");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ words = factory.getCommonWords();
+ assertEquals(8, words.size());
+ assertTrue(words.contains("he"));
+ assertTrue(words.contains("him"));
+ assertTrue(words.contains("his"));
+ assertTrue(words.contains("himself"));
+ assertTrue(words.contains("she"));
+ assertTrue(words.contains("her"));
+ assertTrue(words.contains("hers"));
+ assertTrue(words.contains("herself"));
+ }
+
+ /**
+ * If no words are provided, then a set of english default stopwords is used.
+ */
+ public void testDefaults() throws Exception {
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
+ assertTrue("loader is null and it shouldn't be", loader != null);
+ CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ factory.inform(loader);
+ CharArraySet words = factory.getCommonWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue(words.contains("the"));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream,
+ new String[] { "testing", "testing_the", "the", "the_factory", "factory" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Date Author Id Revision HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java (working copy)
@@ -0,0 +1,107 @@
+package org.apache.lucene.analysis.commongrams;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.TestStopFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.Map;
+import java.util.HashMap;
+
+/**
+ * Tests pretty much copied from StopFilterFactoryTest We use the test files
+ * used by the StopFilterFactoryTest TODO: consider creating separate test files
+ * so this won't break if stop filter test files change
+ **/
+public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamTestCase {
+
+ public void testInform() throws Exception {
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
+ assertTrue("loader is null and it shouldn't be", loader != null);
+ CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
+ Map args = new HashMap();
+ args.put("words", "stop-1.txt");
+ args.put("ignoreCase", "true");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ CharArraySet words = factory.getCommonWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue("words Size: " + words.size() + " is not: " + 2,
+ words.size() == 2);
+ assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
+ .isIgnoreCase() == true);
+
+ factory = new CommonGramsQueryFilterFactory();
+ args.put("words", "stop-1.txt, stop-2.txt");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ words = factory.getCommonWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue("words Size: " + words.size() + " is not: " + 4,
+ words.size() == 4);
+ assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
+ .isIgnoreCase() == true);
+
+ factory = new CommonGramsQueryFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ args.put("words", "stop-snowball.txt");
+ args.put("format", "snowball");
+ factory.init(args);
+ factory.inform(loader);
+ words = factory.getCommonWords();
+ assertEquals(8, words.size());
+ assertTrue(words.contains("he"));
+ assertTrue(words.contains("him"));
+ assertTrue(words.contains("his"));
+ assertTrue(words.contains("himself"));
+ assertTrue(words.contains("she"));
+ assertTrue(words.contains("her"));
+ assertTrue(words.contains("hers"));
+ assertTrue(words.contains("herself"));
+ }
+
+ /**
+ * If no words are provided, then a set of english default stopwords is used.
+ */
+ public void testDefaults() throws Exception {
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
+ assertTrue("loader is null and it shouldn't be", loader != null);
+ CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ factory.inform(loader);
+ CharArraySet words = factory.getCommonWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue(words.contains("the"));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream,
+ new String[] { "testing_the", "the_factory" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Date Author Id Revision HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/compoundDictionary.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/compoundDictionary.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/compoundDictionary.txt (working copy)
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of words for testing the DictionaryCompound factory
+soft
+ball
+team
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/compoundDictionary.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/compoundDictionary.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/compoundDictionary.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/compoundDictionary.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/da_compoundDictionary.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/da_compoundDictionary.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/da_compoundDictionary.txt (working copy)
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of words for testing the HyphenationCompound factory,
+# in conjunction with the danish hyphenation grammar.
+læse
+hest
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/da_compoundDictionary.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/da_compoundDictionary.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/da_compoundDictionary.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/da_compoundDictionary.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java (working copy)
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.compound;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+/**
+ * Simple tests to ensure the Dictionary compound filter factory is working.
+ */
+public class TestDictionaryCompoundWordTokenFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually decompounds text.
+ */
+ public void testDecompounding() throws Exception {
+ Reader reader = new StringReader("I like to play softball");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ Map args = new HashMap();
+ args.put("dictionary", "compoundDictionary.txt");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream,
+ new String[] { "I", "like", "to", "play", "softball", "soft", "ball" });
+ }
+
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestDictionaryCompoundWordTokenFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java (working copy)
@@ -0,0 +1,81 @@
+package org.apache.lucene.analysis.compound;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+/**
+ * Simple tests to ensure the Hyphenation compound filter factory is working.
+ */
+public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the factory works with hyphenation grammar+dictionary: using default options.
+ */
+ public void testHyphenationWithDictionary() throws Exception {
+ Reader reader = new StringReader("min veninde som er lidt af en læsehest");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ Map args = new HashMap();
+ args.put("hyphenator", "da_UTF8.xml");
+ args.put("dictionary", "da_compoundDictionary.txt");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ TokenStream stream = factory.create(tokenizer);
+
+ assertTokenStreamContents(stream,
+ new String[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" },
+ new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
+ );
+ }
+
+ /**
+ * Ensure the factory works with no dictionary: using hyphenation grammar only.
+ * Also change the min/max subword sizes from the default. When using no dictionary,
+ * its generally necessary to tweak these, or you get lots of expansions.
+ */
+ public void testHyphenationOnly() throws Exception {
+ Reader reader = new StringReader("basketballkurv");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ Map args = new HashMap();
+ args.put("hyphenator", "da_UTF8.xml");
+ args.put("minSubwordSize", "2");
+ args.put("maxSubwordSize", "4");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ TokenStream stream = factory.create(tokenizer);
+
+ assertTokenStreamContents(stream,
+ new String[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }
+ );
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-1.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-1.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-1.txt (working copy)
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+foo
+bar
\ No newline at end of file
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-1.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-1.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-1.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-1.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-2.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-2.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-2.txt (working copy)
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+junk
+more
\ No newline at end of file
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-2.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-2.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-2.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-2.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-snowball.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-snowball.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-snowball.txt (working copy)
@@ -0,0 +1,10 @@
+ | This is a file in snowball format, empty lines are ignored, '|' is a comment
+ | Additionally, multiple words can be on the same line, allowing stopwords to be
+ | arranged in tables (useful in some languages where they might inflect)
+
+ | fictitious table below
+
+|third person singular
+|Subject Object Possessive Reflexive
+he him his himself| masculine
+she her hers herself| feminine
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-snowball.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-snowball.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-snowball.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stop-snowball.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-1.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-1.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-1.txt (working copy)
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-1.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-1.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-1.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-1.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-2.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-2.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-2.txt (working copy)
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-2.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-2.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-2.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/stoptypes-2.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (working copy)
@@ -0,0 +1,176 @@
+package org.apache.lucene.analysis.core;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Modifier;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.analysis.CachingTokenFilter;
+import org.apache.lucene.analysis.CharFilter;
+import org.apache.lucene.analysis.EmptyTokenizer;
+import org.apache.lucene.analysis.MockCharFilter;
+import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
+import org.apache.lucene.analysis.MockGraphTokenFilter;
+import org.apache.lucene.analysis.MockHoleInjectingTokenFilter;
+import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ValidatingTokenFilter;
+import org.apache.lucene.analysis.core.TestRandomChains;
+import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
+import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Tests that any newly added Tokenizers/TokenFilters/CharFilters have a
+ * corresponding factory (and that the SPI configuration is correct)
+ */
+public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
+
+ // these are test-only components (e.g. test-framework)
+ private static final Set> testComponents = Collections.newSetFromMap(new IdentityHashMap,Boolean>());
+ static {
+ Collections.>addAll(testComponents,
+ MockTokenizer.class,
+ MockCharFilter.class,
+ MockFixedLengthPayloadFilter.class,
+ MockGraphTokenFilter.class,
+ MockHoleInjectingTokenFilter.class,
+ MockRandomLookaheadTokenFilter.class,
+ MockTokenFilter.class,
+ MockVariableLengthPayloadFilter.class,
+ EmptyTokenizer.class,
+ ValidatingTokenFilter.class
+ );
+ }
+
+ // these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for these?
+ private static final Set> crazyComponents = Collections.newSetFromMap(new IdentityHashMap,Boolean>());
+ static {
+ Collections.>addAll(crazyComponents,
+ CachingTokenFilter.class,
+ TeeSinkTokenFilter.class
+ );
+ }
+
+ // these are oddly-named (either the actual analyzer, or its factory)
+ // they do actually have factories.
+ // TODO: clean this up!
+ private static final Set> oddlyNamedComponents = Collections.newSetFromMap(new IdentityHashMap,Boolean>());
+ static {
+ Collections.>addAll(oddlyNamedComponents,
+ ReversePathHierarchyTokenizer.class, // this is supported via an option to PathHierarchyTokenizer's factory
+ SnowballFilter.class // this is called SnowballPorterFilterFactory
+ );
+ }
+
+ public void test() throws Exception {
+ List> analysisClasses = new ArrayList>();
+ TestRandomChains.getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
+
+ for (final Class> c : analysisClasses) {
+ final int modifiers = c.getModifiers();
+ if (
+ // don't waste time with abstract classes
+ Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers)
+ || c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface()
+ || testComponents.contains(c)
+ || crazyComponents.contains(c)
+ || oddlyNamedComponents.contains(c)
+ || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))
+ ) {
+ continue;
+ }
+
+ if (Tokenizer.class.isAssignableFrom(c)) {
+ String clazzName = c.getSimpleName();
+ assertTrue(clazzName.endsWith("Tokenizer"));
+ String simpleName = clazzName.substring(0, clazzName.length() - 9);
+ TokenizerFactory instance = TokenizerFactory.forName(simpleName);
+ assertNotNull(instance);
+ try {
+ instance.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ instance.init(Collections.emptyMap());
+ // TODO: provide fake ResourceLoader
+ if (!(instance instanceof ResourceLoaderAware)) {
+ assertSame(c, instance.create(new StringReader("")).getClass());
+ }
+ } catch (InitializationException e) {
+ // TODO: For now pass because some factories have not yet a default config that always works, some require ResourceLoader
+ }
+ } else if (TokenFilter.class.isAssignableFrom(c)) {
+ String clazzName = c.getSimpleName();
+ assertTrue(clazzName.endsWith("Filter"));
+ String simpleName = clazzName.substring(0, clazzName.length() - (clazzName.endsWith("TokenFilter") ? 11 : 6));
+ TokenFilterFactory instance = TokenFilterFactory.forName(simpleName);
+ assertNotNull(instance);
+ try {
+ instance.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ instance.init(Collections.emptyMap());
+ // TODO: provide fake ResourceLoader
+ if (!(instance instanceof ResourceLoaderAware)) {
+ Class extends TokenStream> createdClazz = instance.create(new KeywordTokenizer(new StringReader(""))).getClass();
+ // only check instance if factory have wrapped at all!
+ if (KeywordTokenizer.class != createdClazz) {
+ assertSame(c, createdClazz);
+ }
+ }
+ } catch (InitializationException e) {
+ // TODO: For now pass because some factories have not yet a default config that always works, some require ResourceLoader
+ }
+ } else if (CharFilter.class.isAssignableFrom(c)) {
+ String clazzName = c.getSimpleName();
+ assertTrue(clazzName.endsWith("CharFilter"));
+ String simpleName = clazzName.substring(0, clazzName.length() - 10);
+ CharFilterFactory instance = CharFilterFactory.forName(simpleName);
+ assertNotNull(instance);
+ try {
+ instance.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ instance.init(Collections.emptyMap());
+ // TODO: provide fake ResourceLoader
+ if (!(instance instanceof ResourceLoaderAware)) {
+ Class extends Reader> createdClazz = instance.create(new StringReader("")).getClass();
+ // only check instance if factory have wrapped at all!
+ if (StringReader.class != createdClazz) {
+ assertSame(c, createdClazz);
+ }
+ }
+ } catch (InitializationException e) {
+ // TODO: For now pass because some factories have not yet a default config that always works, some require ResourceLoader
+ }
+ }
+ }
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (working copy)
@@ -0,0 +1,179 @@
+package org.apache.lucene.analysis.core;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Collections;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.StringMockResourceLoader;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+/**
+ * Sanity check some things about all factories,
+ * we do our best to see if we can sanely initialize it with
+ * no parameters and smoke test it, etc.
+ */
+// TODO: move this, TestRandomChains, and TestAllAnalyzersHaveFactories
+// to an integration test module that sucks in all analysis modules.
+// currently the only way to do this is via eclipse etc (LUCENE-3974)
+public class TestFactories extends BaseTokenStreamTestCase {
+ public void test() throws IOException {
+ for (String tokenizer : TokenizerFactory.availableTokenizers()) {
+ doTestTokenizer(tokenizer);
+ }
+
+ for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
+ doTestTokenFilter(tokenFilter);
+ }
+
+ for (String charFilter : CharFilterFactory.availableCharFilters()) {
+ doTestCharFilter(charFilter);
+ }
+ }
+
+ private void doTestTokenizer(String tokenizer) throws IOException {
+ TokenizerFactory factory = TokenizerFactory.forName(tokenizer);
+ if (initialize(factory)) {
+ // we managed to fully create an instance. check a few more things:
+
+ // if it implements MultiTermAware, sanity check its impl
+ if (factory instanceof MultiTermAwareComponent) {
+ AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
+ assertNotNull(mtc);
+ // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
+ assertFalse(mtc instanceof CharFilterFactory);
+ }
+
+ // beast it just a little, it shouldnt throw exceptions:
+ // (it should have thrown them in initialize)
+ checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
+ }
+ }
+
+ private void doTestTokenFilter(String tokenfilter) throws IOException {
+ TokenFilterFactory factory = TokenFilterFactory.forName(tokenfilter);
+ if (initialize(factory)) {
+ // we managed to fully create an instance. check a few more things:
+
+ // if it implements MultiTermAware, sanity check its impl
+ if (factory instanceof MultiTermAwareComponent) {
+ AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
+ assertNotNull(mtc);
+ // its not ok to return a charfilter or tokenizer here, this makes no sense
+ assertTrue(mtc instanceof TokenFilterFactory);
+ }
+
+ // beast it just a little, it shouldnt throw exceptions:
+ // (it should have thrown them in initialize)
+ checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
+ }
+ }
+
+ private void doTestCharFilter(String charfilter) throws IOException {
+ CharFilterFactory factory = CharFilterFactory.forName(charfilter);
+ if (initialize(factory)) {
+ // we managed to fully create an instance. check a few more things:
+
+ // if it implements MultiTermAware, sanity check its impl
+ if (factory instanceof MultiTermAwareComponent) {
+ AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
+ assertNotNull(mtc);
+ // its not ok to return a tokenizer or tokenfilter here, this makes no sense
+ assertTrue(mtc instanceof CharFilterFactory);
+ }
+
+ // beast it just a little, it shouldnt throw exceptions:
+ // (it should have thrown them in initialize)
+ checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
+ }
+ }
+
+ /** tries to initialize a factory with no arguments */
+ private boolean initialize(AbstractAnalysisFactory factory) {
+ boolean success = false;
+ try {
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(Collections.emptyMap());
+ success = true;
+ } catch (InitializationException ignored) {
+ // its ok if we dont provide the right parameters to throw this
+ }
+
+ if (factory instanceof ResourceLoaderAware) {
+ success = false;
+ try {
+ ((ResourceLoaderAware) factory).inform(new StringMockResourceLoader(""));
+ success = true;
+ } catch (InitializationException ignored) {
+ // its ok if the right files arent available or whatever to throw this
+ }
+ }
+ return success;
+ }
+
+ // some silly classes just so we can use checkRandomData
+ private TokenizerFactory assertingTokenizer = new TokenizerFactory() {
+ @Override
+ public Tokenizer create(Reader input) {
+ return new MockTokenizer(input);
+ }
+ };
+
+ private static class FactoryAnalyzer extends Analyzer {
+ final TokenizerFactory tokenizer;
+ final CharFilterFactory charFilter;
+ final TokenFilterFactory tokenfilter;
+
+ FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) {
+ assert tokenizer != null;
+ this.tokenizer = tokenizer;
+ this.charFilter = charFilter;
+ this.tokenfilter = tokenfilter;
+ }
+
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tf = tokenizer.create(reader);
+ if (tokenfilter != null) {
+ return new TokenStreamComponents(tf, tokenfilter.create(tf));
+ } else {
+ return new TokenStreamComponents(tf);
+ }
+ }
+
+ @Override
+ protected Reader initReader(String fieldName, Reader reader) {
+ if (charFilter != null) {
+ return charFilter.create(reader);
+ } else {
+ return reader;
+ }
+ }
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (revision 1365483)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (working copy)
@@ -235,7 +235,7 @@
private static Constructor castConstructor(Class instanceClazz, Constructor> ctor) {
return (Constructor) ctor;
}
- private static void getClassesForPackage(String pckgname, List> classes) throws Exception {
+ static void getClassesForPackage(String pckgname, List> classes) throws Exception {
final ClassLoader cld = TestRandomChains.class.getClassLoader();
final String path = pckgname.replace('.', '/');
final Enumeration resources = cld.getResources(path);
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java (working copy)
@@ -0,0 +1,76 @@
+package org.apache.lucene.analysis.core;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+import java.util.Map;
+import java.util.HashMap;
+
+/**
+ *
+ *
+ **/
+public class TestStopFilterFactory extends BaseTokenStreamTestCase {
+
+ public void testInform() throws Exception {
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ assertTrue("loader is null and it shouldn't be", loader != null);
+ StopFilterFactory factory = new StopFilterFactory();
+ Map args = new HashMap();
+ args.put("words", "stop-1.txt");
+ args.put("ignoreCase", "true");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ CharArraySet words = factory.getStopWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
+ assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
+
+ factory = new StopFilterFactory();
+ args.put("words", "stop-1.txt, stop-2.txt");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ words = factory.getStopWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
+ assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
+
+ factory = new StopFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ args.put("words", "stop-snowball.txt");
+ args.put("format", "snowball");
+ factory.init(args);
+ factory.inform(loader);
+ words = factory.getStopWords();
+ assertEquals(8, words.size());
+ assertTrue(words.contains("he"));
+ assertTrue(words.contains("him"));
+ assertTrue(words.contains("his"));
+ assertTrue(words.contains("himself"));
+ assertTrue(words.contains("she"));
+ assertTrue(words.contains("her"));
+ assertTrue(words.contains("hers"));
+ assertTrue(words.contains("herself"));
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java (working copy)
@@ -0,0 +1,105 @@
+package org.apache.lucene.analysis.core;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.NumericTokenStream;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Testcase for {@link TypeTokenFilterFactory}
+ */
+public class TestTypeTokenFilterFactory extends BaseTokenStreamTestCase {
+
+ @Test
+ public void testInform() throws Exception {
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
+ Map args = new HashMap();
+ args.put("types", "stoptypes-1.txt");
+ args.put("enablePositionIncrements", "true");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ Set types = factory.getStopTypes();
+ assertTrue("types is null and it shouldn't be", types != null);
+ assertTrue("types Size: " + types.size() + " is not: " + 2, types.size() == 2);
+ assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.isEnablePositionIncrements());
+
+ factory = new TypeTokenFilterFactory();
+ args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
+ args.put("enablePositionIncrements", "false");
+ args.put("useWhitelist","true");
+ factory.init(args);
+ factory.inform(loader);
+ types = factory.getStopTypes();
+ assertTrue("types is null and it shouldn't be", types != null);
+ assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
+ assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
+ }
+
+ @Test
+ public void testCreationWithBlackList() throws Exception {
+ TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
+ Map args = new HashMap();
+ args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
+ args.put("enablePositionIncrements", "false");
+ typeTokenFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ typeTokenFilterFactory.init(args);
+ NumericTokenStream input = new NumericTokenStream();
+ input.setIntValue(123);
+ typeTokenFilterFactory.create(input);
+ }
+
+ @Test
+ public void testCreationWithWhiteList() throws Exception {
+ TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
+ Map args = new HashMap();
+ args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
+ args.put("enablePositionIncrements", "false");
+ args.put("useWhitelist","true");
+ typeTokenFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ typeTokenFilterFactory.init(args);
+ NumericTokenStream input = new NumericTokenStream();
+ input.setIntValue(123);
+ typeTokenFilterFactory.create(input);
+ }
+
+ @Test
+ public void testMissingTypesParameter() throws Exception {
+ try {
+ TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
+ Map args = new HashMap();
+ args.put("enablePositionIncrements", "false");
+ typeTokenFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ typeTokenFilterFactory.init(args);
+ typeTokenFilterFactory.inform(new ResourceAsStreamResourceLoader(getClass()));
+ fail("not supplying 'types' parameter should cause an InitializationException");
+ } catch (InitializationException e) {
+ // everything ok
+ }
+ }
+
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Date Author Id Revision HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemFilterFactory.java (working copy)
@@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.cz;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the Czech stem filter factory is working.
+ */
+public class TestCzechStemFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually stems text.
+ */
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("angličtí");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ CzechStemFilterFactory factory = new CzechStemFilterFactory();
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "anglick" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.de;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the German light stem factory is working.
+ */
+public class TestGermanLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("häuser");
+ GermanLightStemFilterFactory factory = new GermanLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "haus" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.de;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the German minimal stem factory is working.
+ */
+public class TestGermanMinimalStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("bilder");
+ GermanMinimalStemFilterFactory factory = new GermanMinimalStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "bild" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.de;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the German normalization factory is working.
+ */
+public class TestGermanNormalizationFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("weißbier");
+ GermanNormalizationFilterFactory factory = new GermanNormalizationFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "weissbier" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilterFactory.java (working copy)
@@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.de;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the German stem filter factory is working.
+ */
+public class TestGermanStemFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually stems text.
+ */
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("Tischen");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ GermanStemFilterFactory factory = new GermanStemFilterFactory();
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "tisch" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekLowerCaseFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekLowerCaseFilterFactory.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.analysis.el;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the Greek lowercase filter factory is working.
+ */
+public class TestGreekLowerCaseFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually lowercases (and a bit more) greek text.
+ */
+ public void testNormalization() throws Exception {
+ Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ GreekLowerCaseFilterFactory factory = new GreekLowerCaseFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "μαιοσ", "μαιοσ" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekLowerCaseFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekLowerCaseFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekLowerCaseFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemFilterFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.el;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
+
+/**
+ * Simple tests to ensure the Greek stem filter factory is working.
+ */
+public class TestGreekStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("άνθρωπος");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream normalized = new GreekLowerCaseFilter(TEST_VERSION_CURRENT, tokenizer);
+ GreekStemFilterFactory factory = new GreekStemFilterFactory();
+ TokenStream stream = factory.create(normalized);
+ assertTokenStreamContents(stream, new String[] { "ανθρωπ" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.en;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the English minimal stem factory is working.
+ */
+public class TestEnglishMinimalStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("bricks");
+ EnglishMinimalStemFilterFactory factory = new EnglishMinimalStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "brick" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.en;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the kstem filter factory is working.
+ */
+public class TestKStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("bricks");
+ KStemFilterFactory factory = new KStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "brick" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilterFactory.java (working copy)
@@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.en;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the Porter stem filter factory is working.
+ */
+public class TestPorterStemFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually stems text.
+ */
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("dogs");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ PorterStemFilterFactory factory = new PorterStemFilterFactory();
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "dog" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.es;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Spanish Light stem factory is working.
+ */
+public class TestSpanishLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("sociedades");
+ SpanishLightStemFilterFactory factory = new SpanishLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "sociedad" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilterFactory.java (working copy)
@@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.fa;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the Persian normalization factory is working.
+ */
+public class TestPersianNormalizationFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually normalizes persian text.
+ */
+ public void testNormalization() throws Exception {
+ Reader reader = new StringReader("های");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ PersianNormalizationFilterFactory factory = new PersianNormalizationFilterFactory();
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "هاي" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.fi;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Finnish light stem factory is working.
+ */
+public class TestFinnishLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("aseistettujen");
+ FinnishLightStemFilterFactory factory = new FinnishLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "aseistet" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/frenchArticles.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/frenchArticles.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/frenchArticles.txt (working copy)
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of articles for testing the French Elision filter.
+# Requiring a text file is a bit weird here...
+l
+m
+t
+qu
+n
+s
+j
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/frenchArticles.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/frenchArticles.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/frenchArticles.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/frenchArticles.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElisionFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElisionFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElisionFilterFactory.java (working copy)
@@ -0,0 +1,88 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+/**
+ * Simple tests to ensure the French elision filter factory is working.
+ */
+public class TestElisionFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually normalizes text.
+ */
+ public void testElision() throws Exception {
+ Reader reader = new StringReader("l'avion");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ ElisionFilterFactory factory = new ElisionFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ Map args = new HashMap();
+ args.put("articles", "frenchArticles.txt");
+ factory.init(args);
+ factory.inform(loader);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "avion" });
+ }
+
+ /**
+ * Test creating an elision filter without specifying any articles
+ */
+ public void testDefaultArticles() throws Exception {
+ Reader reader = new StringReader("l'avion");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ ElisionFilterFactory factory = new ElisionFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ factory.inform(loader);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "avion" });
+ }
+
+ /**
+ * Test setting ignoreCase=true
+ */
+ public void testCaseInsensitive() throws Exception {
+ Reader reader = new StringReader("L'avion");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ ElisionFilterFactory factory = new ElisionFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ Map args = new HashMap();
+ args.put("articles", "frenchArticles.txt");
+ args.put("ignoreCase", "true");
+ factory.init(args);
+ factory.inform(loader);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "avion" });
+ }
+
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElisionFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElisionFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElisionFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElisionFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the French light stem factory is working.
+ */
+public class TestFrenchLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("administrativement");
+ FrenchLightStemFilterFactory factory = new FrenchLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "administratif" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the French minimal stem factory is working.
+ */
+public class TestFrenchMinimalStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("chevaux");
+ FrenchMinimalStemFilterFactory factory = new FrenchMinimalStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "cheval" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.ga;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Irish lowercase filter factory is working.
+ */
+public class TestIrishLowerCaseFilterFactory extends BaseTokenStreamTestCase {
+ public void testCasing() throws Exception {
+ Reader reader = new StringReader("nAthair tUISCE hARD");
+ IrishLowerCaseFilterFactory factory = new IrishLowerCaseFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "n-athair", "t-uisce", "hard" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.gl;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Galician plural stem factory is working.
+ */
+public class TestGalicianMinimalStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("elefantes");
+ GalicianMinimalStemFilterFactory factory = new GalicianMinimalStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "elefante" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.gl;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Galician stem factory is working.
+ */
+public class TestGalicianStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("cariñosa");
+ GalicianStemFilterFactory factory = new GalicianStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "cariñ" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java (working copy)
@@ -0,0 +1,91 @@
+package org.apache.lucene.analysis.hi;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.in.IndicNormalizationFilterFactory;
+import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
+
+/**
+ * Simple tests to ensure the Hindi filter Factories are working.
+ */
+public class TestHindiFilters extends BaseTokenStreamTestCase {
+ /**
+ * Test IndicNormalizationFilterFactory
+ */
+ public void testIndicNormalizer() throws Exception {
+ Reader reader = new StringReader("ত্ अाैर");
+ StandardTokenizerFactory factory = new StandardTokenizerFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ IndicNormalizationFilterFactory filterFactory = new IndicNormalizationFilterFactory();
+ filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ filterFactory.init(args);
+ Tokenizer tokenizer = factory.create(reader);
+ TokenStream stream = filterFactory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "ৎ", "और" });
+ }
+
+ /**
+ * Test HindiNormalizationFilterFactory
+ */
+ public void testHindiNormalizer() throws Exception {
+ Reader reader = new StringReader("क़िताब");
+ StandardTokenizerFactory factory = new StandardTokenizerFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
+ HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
+ hindiFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ hindiFilterFactory.init(args);
+ Tokenizer tokenizer = factory.create(reader);
+ TokenStream stream = indicFilterFactory.create(tokenizer);
+ stream = hindiFilterFactory.create(stream);
+ assertTokenStreamContents(stream, new String[] {"किताब"});
+ }
+
+ /**
+ * Test HindiStemFilterFactory
+ */
+ public void testStemmer() throws Exception {
+ Reader reader = new StringReader("किताबें");
+ StandardTokenizerFactory factory = new StandardTokenizerFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
+ HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
+ HindiStemFilterFactory stemFactory = new HindiStemFilterFactory();
+ stemFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ stemFactory.init(args);
+ Tokenizer tokenizer = factory.create(reader);
+ TokenStream stream = indicFilterFactory.create(tokenizer);
+ stream = hindiFilterFactory.create(stream);
+ stream = stemFactory.create(stream);
+ assertTokenStreamContents(stream, new String[] {"किताब"});
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.hu;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Hungarian light stem factory is working.
+ */
+public class TestHungarianLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("házakat");
+ HungarianLightStemFilterFactory factory = new HungarianLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "haz" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+
+/**
+ * Simple tests to ensure the Hunspell stemmer loads from factory
+ */
+public class TestHunspellStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ HunspellStemFilterFactory factory = new HunspellStemFilterFactory();
+ Map args = new HashMap();
+ args.put("dictionary", "test.dic");
+ args.put("affix", "test.aff");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(new ResourceAsStreamResourceLoader(getClass()));
+
+ Reader reader = new StringReader("abc");
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "ab" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java (working copy)
@@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.id;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the Indonesian stem filter factory is working.
+ */
+public class TestIndonesianStemFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually stems text.
+ */
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("dibukukannya");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
+ Map args = new HashMap();
+ factory.init(args);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "buku" });
+ }
+
+ /**
+ * Test inflectional-only mode
+ */
+ public void testStemmingInflectional() throws Exception {
+ Reader reader = new StringReader("dibukukannya");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
+ Map args = new HashMap();
+ args.put("stemDerivational", "false");
+ factory.init(args);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "dibukukan" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.it;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Italian light stem factory is working.
+ */
+public class TestItalianLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("ragazzo ragazzi");
+ ItalianLightStemFilterFactory factory = new ItalianLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "ragazz", "ragazz" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.lv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Latvian stem factory is working.
+ */
+public class TestLatvianStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("tirgiem tirgus");
+ LatvianStemFilterFactory factory = new LatvianStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "tirg", "tirg" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-1.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-1.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-1.txt (working copy)
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+foo
+bar
\ No newline at end of file
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-1.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-1.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-1.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-1.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-2.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-2.txt (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-2.txt (working copy)
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+junk
+more
\ No newline at end of file
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-2.txt
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-2.txt (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-2.txt (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/keep-2.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java (working copy)
@@ -0,0 +1,223 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ *
+ */
+public class TestCapitalizationFilterFactory extends BaseTokenStreamTestCase {
+
+ public void testCapitalization() throws Exception
+ {
+ Map args = new HashMap();
+ args.put( CapitalizationFilterFactory.KEEP, "and the it BIG" );
+ args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
+
+ CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init( args );
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "Kitten" });
+
+ factory.forceFirstLetter = true;
+
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("and"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "And" });
+
+ //first is forced, but it's not a keep word, either
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("AnD"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "And" });
+
+ factory.forceFirstLetter = false;
+
+ //first is not forced, but it's not a keep word, either
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("AnD"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "And" });
+
+ factory.forceFirstLetter = true;
+
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("big"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "Big" });
+
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("BIG"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "BIG" });
+
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.KEYWORD, false)),
+ new String[] { "Hello there my name is ryan" });
+
+ // now each token
+ factory.onlyFirstWord = false;
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
+
+ // now only the long words
+ factory.minWordLength = 3;
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
+
+ // without prefix
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("McKinley"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "Mckinley" });
+
+ // Now try some prefixes
+ factory = new CapitalizationFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ args.put( "okPrefix", "McK" ); // all words
+ factory.init( args );
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("McKinley"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "McKinley" });
+
+ // now try some stuff with numbers
+ factory.forceFirstLetter = false;
+ factory.onlyFirstWord = false;
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("1st 2nd third"), MockTokenizer.WHITESPACE, false)),
+ new String[] { "1st", "2nd", "Third" });
+
+ factory.forceFirstLetter = true;
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("the The the"), MockTokenizer.KEYWORD, false)),
+ new String[] { "The The the" });
+ }
+
+ public void testKeepIgnoreCase() throws Exception {
+ Map args = new HashMap();
+ args.put( CapitalizationFilterFactory.KEEP, "kitten" );
+ args.put( CapitalizationFilterFactory.KEEP_IGNORE_CASE, "true" );
+ args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
+
+ CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init( args );
+ factory.forceFirstLetter = true;
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
+ new String[] { "KiTTEN" });
+
+ factory.forceFirstLetter = false;
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
+ new String[] { "kiTTEN" });
+
+ factory.keep = null;
+ assertTokenStreamContents(factory.create(
+ new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
+ new String[] { "Kitten" });
+ }
+
+ /**
+ * Test CapitalizationFilterFactory's minWordLength option.
+ *
+ * This is very weird when combined with ONLY_FIRST_WORD!!!
+ */
+ public void testMinWordLength() throws Exception {
+ Map args = new HashMap();
+ args.put(CapitalizationFilterFactory.ONLY_FIRST_WORD, "true");
+ args.put(CapitalizationFilterFactory.MIN_WORD_LENGTH, "5");
+ CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(
+ "helo testing"), MockTokenizer.WHITESPACE, false);
+ TokenStream ts = factory.create(tokenizer);
+ assertTokenStreamContents(ts, new String[] {"helo", "Testing"});
+ }
+
+ /**
+ * Test CapitalizationFilterFactory's maxWordCount option with only words of 1
+ * in each token (it should do nothing)
+ */
+ public void testMaxWordCount() throws Exception {
+ Map args = new HashMap();
+ args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
+ CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(
+ "one two three four"), MockTokenizer.WHITESPACE, false);
+ TokenStream ts = factory.create(tokenizer);
+ assertTokenStreamContents(ts, new String[] {"One", "Two", "Three", "Four"});
+ }
+
+ /**
+ * Test CapitalizationFilterFactory's maxWordCount option when exceeded
+ */
+ public void testMaxWordCount2() throws Exception {
+ Map args = new HashMap();
+ args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
+ CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(
+ "one two three four"), MockTokenizer.KEYWORD, false);
+ TokenStream ts = factory.create(tokenizer);
+ assertTokenStreamContents(ts, new String[] {"one two three four"});
+ }
+
+ /**
+ * Test CapitalizationFilterFactory's maxTokenLength option when exceeded
+ *
+ * This is weird, it is not really a max, but inclusive (look at 'is')
+ */
+ public void testMaxTokenLength() throws Exception {
+ Map args = new HashMap();
+ args.put(CapitalizationFilterFactory.MAX_TOKEN_LENGTH, "2");
+ CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(
+ "this is a test"), MockTokenizer.WHITESPACE, false);
+ TokenStream ts = factory.create(tokenizer);
+ assertTokenStreamContents(ts, new String[] {"this", "is", "A", "test"});
+ }
+
+ /**
+ * Test CapitalizationFilterFactory's forceFirstLetter option
+ */
+ public void testForceFirstLetter() throws Exception {
+ Map args = new HashMap();
+ args.put(CapitalizationFilterFactory.KEEP, "kitten");
+ args.put(CapitalizationFilterFactory.FORCE_FIRST_LETTER, "true");
+ CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ Tokenizer tokenizer = new MockTokenizer(new StringReader("kitten"), MockTokenizer.WHITESPACE, false);
+ TokenStream ts = factory.create(tokenizer);
+ assertTokenStreamContents(ts, new String[] {"Kitten"});
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Date Author Id Revision HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java (working copy)
@@ -0,0 +1,61 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+import java.util.Map;
+import java.util.HashMap;
+
+/**
+ *
+ *
+ **/
+public class TestKeepFilterFactory extends BaseTokenStreamTestCase {
+
+ public void testInform() throws Exception {
+ ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
+ assertTrue("loader is null and it shouldn't be", loader != null);
+ KeepWordFilterFactory factory = new KeepWordFilterFactory();
+ Map args = new HashMap();
+ args.put("words", "keep-1.txt");
+ args.put("ignoreCase", "true");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ CharArraySet words = factory.getWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
+
+
+ factory = new KeepWordFilterFactory();
+ args.put("words", "keep-1.txt, keep-2.txt");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+ words = factory.getWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
+
+
+
+ }
+}
\ No newline at end of file
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (working copy)
@@ -0,0 +1,68 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.StringMockResourceLoader;
+
+/**
+ * Simple tests to ensure the keyword marker filter factory is working.
+ */
+public class TestKeywordMarkerFilterFactory extends BaseTokenStreamTestCase {
+ public void testKeywords() throws IOException {
+ Reader reader = new StringReader("dogs cats");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
+ Map args = new HashMap();
+ ResourceLoader loader = new StringMockResourceLoader("cats");
+ args.put("protected", "protwords.txt");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+
+ TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
+ assertTokenStreamContents(ts, new String[] { "dog", "cats" });
+ }
+
+ public void testKeywordsCaseInsensitive() throws IOException {
+ Reader reader = new StringReader("dogs cats Cats");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
+ Map args = new HashMap();
+ ResourceLoader loader = new StringMockResourceLoader("cats");
+ args.put("protected", "protwords.txt");
+ args.put("ignoreCase", "true");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+
+ TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
+ assertTokenStreamContents(ts, new String[] { "dog", "cats", "Cats" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (working copy)
@@ -0,0 +1,50 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+public class TestLengthFilterFactory extends BaseTokenStreamTestCase {
+
+ public void test() throws IOException {
+ LengthFilterFactory factory = new LengthFilterFactory();
+ Map args = new HashMap();
+ args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
+ args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
+ // default: args.put("enablePositionIncrements", "false");
+ factory.init(args);
+ String test = "foo foobar super-duper-trooper";
+ TokenStream stream = factory.create(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
+
+ factory = new LengthFilterFactory();
+ args = new HashMap();
+ args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
+ args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
+ args.put("enablePositionIncrements", "true");
+ factory.init(args);
+ stream = factory.create(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
+ }
+}
\ No newline at end of file
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java (working copy)
@@ -0,0 +1,80 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+import java.util.Iterator;
+import java.util.Arrays;
+
+/** Simple tests to ensure this factory is working */
+public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenStreamTestCase {
+
+ public static Token tok(int pos, String t, int start, int end) {
+ Token tok = new Token(t,start,end);
+ tok.setPositionIncrement(pos);
+ return tok;
+ }
+ public static Token tok(int pos, String t) {
+ return tok(pos, t, 0,0);
+ }
+
+ public void testDups(final String expected, final Token... tokens)
+ throws Exception {
+
+ final Iterator toks = Arrays.asList(tokens).iterator();
+ RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
+ final TokenStream ts = factory.create
+ (new TokenStream() {
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ @Override
+ public boolean incrementToken() {
+ if (toks.hasNext()) {
+ clearAttributes();
+ Token tok = toks.next();
+ termAtt.setEmpty().append(tok);
+ offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
+ posIncAtt.setPositionIncrement(tok.getPositionIncrement());
+ return true;
+ } else {
+ return false;
+ }
+ }
+ });
+
+ assertTokenStreamContents(ts, expected.split("\\s"));
+ }
+
+ public void testSimpleDups() throws Exception {
+ testDups("A B C D E"
+ ,tok(1,"A", 0, 4)
+ ,tok(1,"B", 5, 10)
+ ,tok(0,"B",11, 15)
+ ,tok(1,"C",16, 20)
+ ,tok(0,"D",16, 20)
+ ,tok(1,"E",21, 25)
+ );
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java (working copy)
@@ -0,0 +1,69 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.StringMockResourceLoader;
+
+/**
+ * Simple tests to ensure the stemmer override filter factory is working.
+ */
+public class TestStemmerOverrideFilterFactory extends BaseTokenStreamTestCase {
+ public void testKeywords() throws IOException {
+ // our stemdict stems dogs to 'cat'
+ Reader reader = new StringReader("testing dogs");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
+ Map args = new HashMap();
+ ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
+ args.put("dictionary", "stemdict.txt");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+
+ TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
+ assertTokenStreamContents(ts, new String[] { "test", "cat" });
+ }
+
+ public void testKeywordsCaseInsensitive() throws IOException {
+ Reader reader = new StringReader("testing DoGs");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
+ Map args = new HashMap();
+ ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
+ args.put("dictionary", "stemdict.txt");
+ args.put("ignoreCase", "true");
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ factory.init(args);
+ factory.inform(loader);
+
+ TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
+ assertTokenStreamContents(ts, new String[] { "test", "cat" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure this factory is working
+ */
+public class TestTrimFilterFactory extends BaseTokenStreamTestCase {
+ public void testTrimming() throws Exception {
+ TrimFilterFactory factory = new TrimFilterFactory();
+ Map args = new HashMap();
+ args.put("updateOffsets", "false");
+ factory.init(args);
+ TokenStream ts = factory.create(new MockTokenizer(new StringReader("trim me "), MockTokenizer.KEYWORD, false));
+ assertTokenStreamContents(ts, new String[] { "trim me" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java (working copy)
@@ -0,0 +1,164 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the NGram filter factories are working.
+ */
+public class TestNGramFilters extends BaseTokenStreamTestCase {
+ /**
+ * Test NGramTokenizerFactory
+ */
+ public void testNGramTokenizer() throws Exception {
+ Reader reader = new StringReader("test");
+ Map args = new HashMap();
+ NGramTokenizerFactory factory = new NGramTokenizerFactory();
+ factory.init(args);
+ Tokenizer stream = factory.create(reader);
+ assertTokenStreamContents(stream,
+ new String[] { "t", "e", "s", "t", "te", "es", "st" });
+ }
+ /**
+ * Test NGramTokenizerFactory with min and max gram options
+ */
+ public void testNGramTokenizer2() throws Exception {
+ Reader reader = new StringReader("test");
+ Map args = new HashMap();
+ args.put("minGramSize", "2");
+ args.put("maxGramSize", "3");
+ NGramTokenizerFactory factory = new NGramTokenizerFactory();
+ factory.init(args);
+ Tokenizer stream = factory.create(reader);
+ assertTokenStreamContents(stream,
+ new String[] { "te", "es", "st", "tes", "est" });
+ }
+ /**
+ * Test the NGramFilterFactory
+ */
+ public void testNGramFilter() throws Exception {
+ Reader reader = new StringReader("test");
+ Map args = new HashMap();
+ NGramFilterFactory factory = new NGramFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "t", "e", "s", "t", "te", "es", "st" });
+ }
+ /**
+ * Test the NGramFilterFactory with min and max gram options
+ */
+ public void testNGramFilter2() throws Exception {
+ Reader reader = new StringReader("test");
+ Map args = new HashMap();
+ args.put("minGramSize", "2");
+ args.put("maxGramSize", "3");
+ NGramFilterFactory factory = new NGramFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "te", "es", "st", "tes", "est" });
+ }
+ /**
+ * Test EdgeNGramTokenizerFactory
+ */
+ public void testEdgeNGramTokenizer() throws Exception {
+ Reader reader = new StringReader("test");
+ Map args = new HashMap();
+ EdgeNGramTokenizerFactory factory = new EdgeNGramTokenizerFactory();
+ factory.init(args);
+ Tokenizer stream = factory.create(reader);
+ assertTokenStreamContents(stream,
+ new String[] { "t" });
+ }
+ /**
+ * Test EdgeNGramTokenizerFactory with min and max gram size
+ */
+ public void testEdgeNGramTokenizer2() throws Exception {
+ Reader reader = new StringReader("test");
+ Map args = new HashMap();
+ args.put("minGramSize", "1");
+ args.put("maxGramSize", "2");
+ EdgeNGramTokenizerFactory factory = new EdgeNGramTokenizerFactory();
+ factory.init(args);
+ Tokenizer stream = factory.create(reader);
+ assertTokenStreamContents(stream,
+ new String[] { "t", "te" });
+ }
+ /**
+ * Test EdgeNGramTokenizerFactory with side option
+ */
+ public void testEdgeNGramTokenizer3() throws Exception {
+ Reader reader = new StringReader("ready");
+ Map args = new HashMap();
+ args.put("side", "back");
+ EdgeNGramTokenizerFactory factory = new EdgeNGramTokenizerFactory();
+ factory.init(args);
+ Tokenizer stream = factory.create(reader);
+ assertTokenStreamContents(stream,
+ new String[] { "y" });
+ }
+ /**
+ * Test EdgeNGramFilterFactory
+ */
+ public void testEdgeNGramFilter() throws Exception {
+ Reader reader = new StringReader("test");
+ Map args = new HashMap();
+ EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "t" });
+ }
+ /**
+ * Test EdgeNGramFilterFactory with min and max gram size
+ */
+ public void testEdgeNGramFilter2() throws Exception {
+ Reader reader = new StringReader("test");
+ Map args = new HashMap();
+ args.put("minGramSize", "1");
+ args.put("maxGramSize", "2");
+ EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "t", "te" });
+ }
+ /**
+ * Test EdgeNGramFilterFactory with side option
+ */
+ public void testEdgeNGramFilter3() throws Exception {
+ Reader reader = new StringReader("ready");
+ Map args = new HashMap();
+ args.put("side", "back");
+ EdgeNGramFilterFactory factory = new EdgeNGramFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "y" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.no;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Norwegian Light stem factory is working.
+ */
+public class TestNorwegianLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("epler eple");
+ NorwegianLightStemFilterFactory factory = new NorwegianLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "epl", "epl" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.no;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Norwegian Minimal stem factory is working.
+ */
+public class TestNorwegianMinimalStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("eple eplet epler eplene eplets eplenes");
+ NorwegianMinimalStemFilterFactory factory = new NorwegianMinimalStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "epl", "epl", "epl", "epl", "epl", "epl" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java (working copy)
@@ -0,0 +1,86 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.*;
+
+/**
+ * Simple tests to ensure this factory is working
+ */
+public class TestPatternReplaceCharFilterFactory extends BaseTokenStreamTestCase {
+
+ // 1111
+ // 01234567890123
+ // this is test.
+ public void testNothingChange() throws IOException {
+ final String BLOCK = "this is test.";
+ PatternReplaceCharFilterFactory factory = new PatternReplaceCharFilterFactory();
+ Map args = new HashMap();
+ args.put("pattern", "(aa)\\s+(bb)\\s+(cc)");
+ args.put("replacement", "$1$2$3");
+ factory.init(args);
+ CharFilter cs = factory.create(
+ new StringReader( BLOCK ) );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts,
+ new String[] { "this", "is", "test." },
+ new int[] { 0, 5, 8 },
+ new int[] { 4, 7, 13 });
+ }
+
+ // 012345678
+ // aa bb cc
+ public void testReplaceByEmpty() throws IOException {
+ final String BLOCK = "aa bb cc";
+ PatternReplaceCharFilterFactory factory = new PatternReplaceCharFilterFactory();
+ Map args = new HashMap();
+ args.put("pattern", "(aa)\\s+(bb)\\s+(cc)");
+ factory.init(args);
+ CharFilter cs = factory.create(
+ new StringReader( BLOCK ) );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ ts.reset();
+ assertFalse(ts.incrementToken());
+ ts.end();
+ ts.close();
+ }
+
+ // 012345678
+ // aa bb cc
+ // aa#bb#cc
+ public void test1block1matchSameLength() throws IOException {
+ final String BLOCK = "aa bb cc";
+ PatternReplaceCharFilterFactory factory = new PatternReplaceCharFilterFactory();
+ Map args = new HashMap();
+ args.put("pattern", "(aa)\\s+(bb)\\s+(cc)");
+ args.put("replacement", "$1#$2#$3");
+ factory.init(args);
+ CharFilter cs = factory.create(
+ new StringReader( BLOCK ) );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts,
+ new String[] { "aa#bb#cc" },
+ new int[] { 0 },
+ new int[] { 8 });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Date Author Id Revision HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilterFactory.java (working copy)
@@ -0,0 +1,46 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Simple tests to ensure this factory is working
+ */
+public class TestPatternReplaceFilterFactory extends BaseTokenStreamTestCase {
+
+ public void testReplaceAll() throws Exception {
+ String input = "aabfooaabfooabfoob ab caaaaaaaaab";
+ PatternReplaceFilterFactory factory = new PatternReplaceFilterFactory();
+ Map args = new HashMap();
+ args.put("pattern", "a*b");
+ args.put("replacement", "-");
+ factory.init(args);
+ TokenStream ts = factory.create
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false));
+
+ assertTokenStreamContents(ts,
+ new String[] { "-foo-foo-foo-", "-", "c-" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+
+/** Simple Tests to ensure this factory is working */
+public class TestPatternTokenizerFactory extends BaseTokenStreamTestCase {
+ public void testFactory() throws Exception {
+ final String INPUT = "Günther Günther is here";
+
+ // create PatternTokenizer
+ Map args = new HashMap();
+ args.put( PatternTokenizerFactory.PATTERN, "[,;/\\s]+" );
+ PatternTokenizerFactory tokFactory = new PatternTokenizerFactory();
+ tokFactory.init( args );
+ TokenStream stream = tokFactory.create( new StringReader(INPUT) );
+ assertTokenStreamContents(stream,
+ new String[] { "Günther", "Günther", "is", "here" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilterFactory.java (working copy)
@@ -0,0 +1,80 @@
+package org.apache.lucene.analysis.payloads;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
+import org.apache.lucene.analysis.payloads.FloatEncoder;
+import org.apache.lucene.analysis.payloads.PayloadHelper;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.StringMockResourceLoader;
+
+public class TestDelimitedPayloadTokenFilterFactory extends BaseTokenStreamTestCase {
+
+ public void testEncoder() throws Exception {
+ Map args = new HashMap();
+ args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float");
+ DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
+ factory.init(args);
+ ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
+ factory.inform(loader);
+
+ TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false);
+ DelimitedPayloadTokenFilter tf = factory.create(input);
+ tf.reset();
+ while (tf.incrementToken()){
+ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
+ assertTrue("payAttr is null and it shouldn't be", payAttr != null);
+ byte[] payData = payAttr.getPayload().bytes;
+ assertTrue("payData is null and it shouldn't be", payData != null);
+ assertTrue("payData is null and it shouldn't be", payData != null);
+ float payFloat = PayloadHelper.decodeFloat(payData);
+ assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
+ }
+ }
+
+ public void testDelim() throws Exception {
+ Map args = new HashMap();
+ args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName());
+ args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*");
+ DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
+ factory.init(args);
+ ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
+ factory.inform(loader);
+
+ TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false);
+ DelimitedPayloadTokenFilter tf = factory.create(input);
+ tf.reset();
+ while (tf.incrementToken()){
+ PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
+ assertTrue("payAttr is null and it shouldn't be", payAttr != null);
+ byte[] payData = payAttr.getPayload().bytes;
+ assertTrue("payData is null and it shouldn't be", payData != null);
+ float payFloat = PayloadHelper.decodeFloat(payData);
+ assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
+ }
+ }
+}
+
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.pt;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Portuguese Light stem factory is working.
+ */
+public class TestPortugueseLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("evidentemente");
+ PortugueseLightStemFilterFactory factory = new PortugueseLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "evident" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.pt;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Portuguese Minimal stem factory is working.
+ */
+public class TestPortugueseMinimalStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("questões");
+ PortugueseMinimalStemFilterFactory factory = new PortugueseMinimalStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "questão" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.pt;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Portuguese stem factory is working.
+ */
+public class TestPortugueseStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("maluquice");
+ PortugueseStemFilterFactory factory = new PortugueseStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "maluc" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilterFactory.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.analysis.reverse;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests to ensure the Reverse string filter factory is working.
+ */
+public class TestReverseStringFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Ensure the filter actually reverses text.
+ */
+ public void testReversing() throws Exception {
+ Reader reader = new StringReader("simple test");
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ ReverseStringFilterFactory factory = new ReverseStringFilterFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map args = Collections.emptyMap();
+ factory.init(args);
+ TokenStream stream = factory.create(tokenizer);
+ assertTokenStreamContents(stream, new String[] { "elpmis", "tset" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.ru;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Russian light stem factory is working.
+ */
+public class TestRussianLightStemFilterFactory extends BaseTokenStreamTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("журналы");
+ RussianLightStemFilterFactory factory = new RussianLightStemFilterFactory();
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] { "журнал" });
+ }
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilterFactory.java (revision 1365496)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleFilterFactory.java (working copy)
@@ -0,0 +1,239 @@
+package org.apache.lucene.analysis.shingle;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Shingle filter factory works.
+ */
+public class TestShingleFilterFactory extends BaseTokenStreamTestCase {
+ /**
+ * Test the defaults
+ */
+ public void testDefaults() throws Exception {
+ Reader reader = new StringReader("this is a test");
+ Map args = new HashMap();
+ ShingleFilterFactory factory = new ShingleFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream, new String[] {"this", "this is", "is",
+ "is a", "a", "a test", "test"});
+ }
+
+ /**
+ * Test with unigrams disabled
+ */
+ public void testNoUnigrams() throws Exception {
+ Reader reader = new StringReader("this is a test");
+ Map args = new HashMap();
+ args.put("outputUnigrams", "false");
+ ShingleFilterFactory factory = new ShingleFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] {"this is", "is a", "a test"});
+ }
+
+ /**
+ * Test with a higher max shingle size
+ */
+ public void testMaxShingleSize() throws Exception {
+ Reader reader = new StringReader("this is a test");
+ Map args = new HashMap();
+ args.put("maxShingleSize", "3");
+ ShingleFilterFactory factory = new ShingleFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] {"this", "this is", "this is a", "is",
+ "is a", "is a test", "a", "a test", "test"});
+ }
+
+ /**
+ * Test with higher min (and max) shingle size
+ */
+ public void testMinShingleSize() throws Exception {
+ Reader reader = new StringReader("this is a test");
+ Map args = new HashMap();
+ args.put("minShingleSize", "3");
+ args.put("maxShingleSize", "4");
+ ShingleFilterFactory factory = new ShingleFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "this", "this is a", "this is a test",
+ "is", "is a test", "a", "test" });
+ }
+
+ /**
+ * Test with higher min (and max) shingle size and with unigrams disabled
+ */
+ public void testMinShingleSizeNoUnigrams() throws Exception {
+ Reader reader = new StringReader("this is a test");
+ Map args = new HashMap();
+ args.put("minShingleSize", "3");
+ args.put("maxShingleSize", "4");
+ args.put("outputUnigrams", "false");
+ ShingleFilterFactory factory = new ShingleFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "this is a", "this is a test", "is a test" });
+ }
+
+ /**
+ * Test with higher same min and max shingle size
+ */
+ public void testEqualMinAndMaxShingleSize() throws Exception {
+ Reader reader = new StringReader("this is a test");
+ Map args = new HashMap();
+ args.put("minShingleSize", "3");
+ args.put("maxShingleSize", "3");
+ ShingleFilterFactory factory = new ShingleFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "this", "this is a", "is", "is a test", "a", "test" });
+ }
+
+ /**
+ * Test with higher same min and max shingle size and with unigrams disabled
+ */
+ public void testEqualMinAndMaxShingleSizeNoUnigrams() throws Exception {
+ Reader reader = new StringReader("this is a test");
+ Map args = new HashMap();
+ args.put("minShingleSize", "3");
+ args.put("maxShingleSize", "3");
+ args.put("outputUnigrams", "false");
+ ShingleFilterFactory factory = new ShingleFilterFactory();
+ factory.init(args);
+ TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ assertTokenStreamContents(stream,
+ new String[] { "this is a", "is a test" });
+ }
+
+ /**
+ * Test with a non-default token separator
+ */
+ public void testTokenSeparator() throws Exception {
+ Reader reader = new StringReader("this is a test");
+ Map args = new HashMap