Index: .
===================================================================
--- . (revision 1365638)
+++ . (working copy)
Property changes on: .
___________________________________________________________________
Modified: svn:mergeinfo
Merged /lucene/dev/trunk:r1365586,1365602,1365610
Merged /lucene/dev/branches/lucene2510:r1364862-1365496
Index: dev-tools
===================================================================
--- dev-tools (revision 1365638)
+++ dev-tools (working copy)
Property changes on: dev-tools
___________________________________________________________________
Modified: svn:mergeinfo
Merged /lucene/dev/trunk/dev-tools:r1365586,1365602,1365610
Merged /lucene/dev/branches/lucene2510/dev-tools:r1364862-1365496
Index: dev-tools/eclipse/dot.classpath
===================================================================
--- dev-tools/eclipse/dot.classpath (revision 1365638)
+++ dev-tools/eclipse/dot.classpath (working copy)
@@ -17,25 +17,28 @@
+ * This factory accepts the following parameters:
+ *
+ *
+ * group=-1 (the default) is equivalent to "split". In this case, the tokens will
+ * be equivalent to the output from (without empty tokens):
+ * {@link String#split(java.lang.String)}
+ *
+ * Using group >= 0 selects the matching group as the token. For example, if you have:
+ * <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.ArabicNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ArabicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+
+ public ArabicNormalizationFilter create(TokenStream input) {
+ return new ArabicNormalizationFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java (working copy)
@@ -0,0 +1,43 @@
+package org.apache.lucene.analysis.ar;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ar.ArabicStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+
+/**
+ * Factory for {@link ArabicStemFilter}.
+ *
+ * <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.ArabicNormalizationFilterFactory"/>
+ * <filter class="solr.ArabicStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ArabicStemFilterFactory extends TokenFilterFactory {
+
+
+ public ArabicStemFilter create(TokenStream input) {
+ return new ArabicStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.bg;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.bg.BulgarianStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link BulgarianStemFilter}.
+ *
+ * <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.BulgarianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class BulgarianStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new BulgarianStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.br;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.br.BrazilianStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link BrazilianStemFilter}.
+ *
+ * <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.BrazilianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class BrazilianStemFilterFactory extends TokenFilterFactory {
+ public BrazilianStemFilter create(TokenStream in) {
+ return new BrazilianStemFilter(in);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterFactory.java (working copy)
@@ -0,0 +1,70 @@
+package org.apache.lucene.analysis.charfilter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+
+import java.io.Reader;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+* Factory for {@link HTMLStripCharFilter}.
+ *
+ * <fieldType name="text_html" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.HTMLStripCharFilterFactory" escapedTags="a, title" />
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+ public class HTMLStripCharFilterFactory extends CharFilterFactory {
+
+ Set
+ * <fieldType name="text_map" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ * @since Solr 1.4
+ *
+ */
+public class MappingCharFilterFactory extends CharFilterFactory implements
+ ResourceLoaderAware, MultiTermAwareComponent {
+
+ protected NormalizeCharMap normMap;
+ private String mapping;
+
+ public void inform(ResourceLoader loader) {
+ mapping = args.get( "mapping" );
+
+ if( mapping != null ){
+ List
+ * <fieldType name="text_cjk" class="solr.TextField">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.CJKWidthFilterFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.CJKBigramFilterFactory"
+ * han="true" hiragana="true"
+ * katakana="true" hangul="true" />
+ * </analyzer>
+ * </fieldType>
+ */
+public class CJKBigramFilterFactory extends TokenFilterFactory {
+ int flags;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.CJKTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ * @deprecated
+ */
+@Deprecated
+public class CJKTokenizerFactory extends TokenizerFactory {
+ public CJKTokenizer create(Reader in) {
+ return new CJKTokenizer(in);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizerFactory.java (revision 1365593)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizerFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java (working copy)
@@ -0,0 +1,50 @@
+package org.apache.lucene.analysis.cjk;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.cjk.CJKWidthFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link CJKWidthFilter}.
+ *
+ * <fieldType name="text_cjk" class="solr.TextField">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.CJKWidthFilterFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.CJKBigramFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+
+public class CJKWidthFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+
+ @Override
+ public TokenStream create(TokenStream input) {
+ return new CJKWidthFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilterFactory.java (working copy)
@@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.cn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.cn.ChineseFilter;
+import org.apache.lucene.analysis.core.StopFilterFactory; // javadocs
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link ChineseFilter}
+ * @deprecated Use {@link StopFilterFactory} instead.
+ */
+@Deprecated
+public class ChineseFilterFactory extends TokenFilterFactory {
+
+ public ChineseFilter create(TokenStream in) {
+ return new ChineseFilter(in);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilterFactory.java (revision 1365593)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizerFactory.java (working copy)
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.cn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.cn.ChineseTokenizer;
+import org.apache.lucene.analysis.standard.StandardTokenizerFactory; // javadocs
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+/**
+ * Factory for {@link ChineseTokenizer}
+ * @deprecated Use {@link StandardTokenizerFactory} instead.
+ */
+@Deprecated
+public class ChineseTokenizerFactory extends TokenizerFactory {
+
+ public ChineseTokenizer create(Reader in) {
+ return new ChineseTokenizer(in);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizerFactory.java (revision 1365593)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizerFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java (working copy)
@@ -0,0 +1,83 @@
+package org.apache.lucene.analysis.commongrams;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.util.*;
+
+/**
+ * Constructs a {@link CommonGramsFilter}.
+ *
+ * <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+
+/*
+ * This is pretty close to a straight copy from StopFilterFactory
+ */
+public class CommonGramsFilterFactory extends TokenFilterFactory implements
+ ResourceLoaderAware {
+
+ public void inform(ResourceLoader loader) {
+ String commonWordFiles = args.get("words");
+ ignoreCase = getBoolean("ignoreCase", false);
+
+ if (commonWordFiles != null) {
+ try {
+ if ("snowball".equalsIgnoreCase(args.get("format"))) {
+ commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
+ } else {
+ commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
+ }
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading common word file", e);
+ }
+ } else {
+ commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+ }
+ }
+
+ //Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095
+ private CharArraySet commonWords;
+ private boolean ignoreCase;
+
+ public boolean isIgnoreCase() {
+ return ignoreCase;
+ }
+
+ public CharArraySet getCommonWords() {
+ return commonWords;
+ }
+
+ public CommonGramsFilter create(TokenStream input) {
+ CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
+ return commonGrams;
+ }
+}
+
+
+
\ No newline at end of file
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilterFactory.java (working copy)
@@ -0,0 +1,95 @@
+package org.apache.lucene.analysis.commongrams;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilterFactory;
+import org.apache.lucene.analysis.util.*;
+
+/**
+ * Construct {@link CommonGramsQueryFilter}.
+ *
+ * This is pretty close to a straight copy from {@link StopFilterFactory}.
+ *
+ *
+ * <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class CommonGramsQueryFilterFactory extends TokenFilterFactory
+ implements ResourceLoaderAware {
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
+ * minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ private CharArraySet dictionary;
+ private String dictFile;
+ private int minWordSize;
+ private int minSubwordSize;
+ private int maxSubwordSize;
+ private boolean onlyLongestMatch;
+ @Override
+ public void init(Map
+ *
+ * hyphenator (mandatory): path to the FOP xml hyphenation pattern.
+ * See http://offo.sourceforge.net/hyphenation/.
+ * encoding (optional): encoding of the xml hyphenation file. defaults to UTF-8.
+ * dictionary (optional): dictionary of words. defaults to no dictionary.
+ * minWordSize (optional): minimal word length that gets decomposed. defaults to 5.
+ * minSubwordSize (optional): minimum length of subwords. defaults to 2.
+ * maxSubwordSize (optional): maximum length of subwords. defaults to 15.
+ * onlyLongestMatch (optional): if true, adds only the longest matching subword
+ * to the stream. defaults to false.
+ *
+ * <fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
+ * dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ * @see HyphenationCompoundWordTokenFilter
+ */
+public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ private CharArraySet dictionary;
+ private HyphenationTree hyphenator;
+ private String dictFile;
+ private String hypFile;
+ private String encoding;
+ private int minWordSize;
+ private int minSubwordSize;
+ private int maxSubwordSize;
+ private boolean onlyLongestMatch;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.KeywordTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class KeywordTokenizerFactory extends TokenizerFactory {
+ public KeywordTokenizer create(Reader input) {
+ return new KeywordTokenizer(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.analysis.core;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * Factory for {@link LetterTokenizer}.
+ *
+ * <fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.LetterTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class LetterTokenizerFactory extends TokenizerFactory {
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ @Override
+ public void init(Map
+ * <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
+ @Override
+ public void init(Map
+ * <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.StopFilterFactory" ignoreCase="true"
+ * words="stopwords.txt" enablePositionIncrements="true"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ @Override
+ public void init(Map
+ * <fieldType name="chars" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.TypeTokenFilterFactory" types="stoptypes.txt"
+ * enablePositionIncrements="true" useWhiteList="false"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class TypeTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ @Override
+ public void inform(ResourceLoader loader) {
+ String stopTypesFiles = args.get("types");
+ enablePositionIncrements = getBoolean("enablePositionIncrements", false);
+ useWhitelist = getBoolean("useWhitelist", false);
+ if (stopTypesFiles != null) {
+ try {
+ List
+ * <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class WhitespaceTokenizerFactory extends TokenizerFactory {
+ @Override
+ public void init(Map
+ * <fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.CzechStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class CzechStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new CzechStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.de;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.GermanLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GermanLightStemFilter}.
+ *
+ * <fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.GermanLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GermanLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new GermanLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.de;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GermanMinimalStemFilter}.
+ *
+ * <fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.GermanMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GermanMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new GermanMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilterFactory.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.analysis.de;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.GermanNormalizationFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GermanNormalizationFilter}.
+ *
+ * <fieldType name="text_denorm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.GermanNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class GermanNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+
+ public TokenStream create(TokenStream input) {
+ return new GermanNormalizationFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilterFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.de;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.de.GermanStemFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GermanStemFilter}.
+ *
+ * <fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.GermanStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GermanStemFilterFactory extends TokenFilterFactory {
+ public GermanStemFilter create(TokenStream in) {
+ return new GermanStemFilter(in);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java (working copy)
@@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.el;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GreekLowerCaseFilter}.
+ *
+ * <fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.GreekLowerCaseFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.GreekLowerCaseFilterFactory"/>
+ * <filter class="solr.GreekStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GreekStemFilterFactory extends TokenFilterFactory {
+
+ public TokenStream create(TokenStream input) {
+ return new GreekStemFilter(input);
+ }
+
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.en;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link EnglishMinimalStemFilter}.
+ *
+ * <fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.EnglishMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class EnglishMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new EnglishMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java (working copy)
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.en;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link EnglishPossessiveFilter}.
+ *
+ * <fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.EnglishPossessiveFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class EnglishPossessiveFilterFactory extends TokenFilterFactory {
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.PorterStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PorterStemFilterFactory extends TokenFilterFactory {
+ public PorterStemFilter create(TokenStream input) {
+ return new PorterStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.es;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.es.SpanishLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link SpanishLightStemFilter}.
+ *
+ * <fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.SpanishLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class SpanishLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new SpanishLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java (working copy)
@@ -0,0 +1,50 @@
+package org.apache.lucene.analysis.fa;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.CharFilter;
+import org.apache.lucene.analysis.fa.PersianCharFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+
+/**
+ * Factory for {@link PersianCharFilter}.
+ *
+ * <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.PersianCharFilterFactory"/>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PersianCharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
+
+ @Override
+ public CharFilter create(Reader input) {
+ return new PersianCharFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java (working copy)
@@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.fa;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link PersianNormalizationFilter}.
+ *
+ * <fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.PersianCharFilterFactory"/>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.PersianNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PersianNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public PersianNormalizationFilter create(TokenStream input) {
+ return new PersianNormalizationFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.fi;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link FinnishLightStemFilter}.
+ *
+ * <fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.FinnishLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class FinnishLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new FinnishLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java (working copy)
@@ -0,0 +1,61 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.util.*;
+
+import java.io.IOException;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Factory for {@link ElisionFilter}.
+ *
+ * <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.ElisionFilterFactory"
+ * articles="stopwordarticles.txt" ignoreCase="true"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ElisionFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ private CharArraySet articles;
+
+ public void inform(ResourceLoader loader) {
+ String articlesFile = args.get("articles");
+ boolean ignoreCase = getBoolean("ignoreCase", false);
+
+ if (articlesFile != null) {
+ try {
+ articles = getWordSet(loader, articlesFile, ignoreCase);
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading articles", e);
+ }
+ }
+ }
+
+ public ElisionFilter create(TokenStream input) {
+ assureMatchVersion();
+ return articles == null ? new ElisionFilter(luceneMatchVersion,input) :
+ new ElisionFilter(luceneMatchVersion,input,articles);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link FrenchLightStemFilter}.
+ *
+ * <fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.ElisionFilterFactory"/>
+ * <filter class="solr.FrenchLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class FrenchLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new FrenchLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.fr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link FrenchMinimalStemFilter}.
+ *
+ * <fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.ElisionFilterFactory"/>
+ * <filter class="solr.FrenchMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class FrenchMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new FrenchMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java (working copy)
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.ga;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ga.IrishLowerCaseFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link IrishLowerCaseFilter}.
+ *
+ * <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.IrishLowerCaseFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class IrishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+
+ @Override
+ public TokenStream create(TokenStream input) {
+ return new IrishLowerCaseFilter(input);
+ }
+
+ // this will 'mostly work', except for special cases, just like most other filters
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.gl;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.gl.GalicianMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GalicianMinimalStemFilter}.
+ *
+ * <fieldType name="text_glplural" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.GalicianMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GalicianMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new GalicianMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.gl;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.gl.GalicianStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link GalicianStemFilter}.
+ *
+ * <fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.GalicianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class GalicianStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new GalicianStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java (working copy)
@@ -0,0 +1,46 @@
+package org.apache.lucene.analysis.hi;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link HindiNormalizationFilter}.
+ *
+ * <fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.HindiNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class HindiNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public TokenStream create(TokenStream input) {
+ return new HindiNormalizationFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.hi;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hi.HindiStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link HindiStemFilter}.
+ *
+ * <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.HindiStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class HindiStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new HindiStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.hu;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link HungarianLightStemFilter}.
+ *
+ * <fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.HungarianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class HungarianLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new HungarianLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (working copy)
@@ -0,0 +1,121 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hunspell.HunspellDictionary;
+import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * TokenFilterFactory that creates instances of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}.
+ * Example config for British English including a custom dictionary, case insensitive matching:
+ *
+ * <filter class="solr.HunspellStemFilterFactory"
+ * dictionary="en_GB.dic,my_custom.dic"
+ * affix="en_GB.aff"
+ * ignoreCase="true" />
+ * Both parameters dictionary and affix are mandatory.
+ *
+ * The parameter ignoreCase (true/false) controls whether matching is case sensitive or not. Default false.
+ *
+ * The parameter strictAffixParsing (true/false) controls whether the affix parsing is strict or not. Default true.
+ * If strict an error while reading an affix rule causes a ParseException, otherwise is ignored.
+ *
+ * Dictionaries for many languages are available through the OpenOffice project.
+ *
+ * See http://wiki.apache.org/solr/Hunspell
+ */
+public class HunspellStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ private static final String PARAM_DICTIONARY = "dictionary";
+ private static final String PARAM_AFFIX = "affix";
+ private static final String PARAM_IGNORE_CASE = "ignoreCase";
+ private static final String PARAM_STRICT_AFFIX_PARSING = "strictAffixParsing";
+ private static final String TRUE = "true";
+ private static final String FALSE = "false";
+
+ private HunspellDictionary dictionary;
+ private boolean ignoreCase = false;
+
+ /**
+ * Loads the hunspell dictionary and affix files defined in the configuration
+ *
+ * @param loader ResourceLoader used to load the files
+ */
+ public void inform(ResourceLoader loader) {
+ assureMatchVersion();
+ String dictionaryArg = args.get(PARAM_DICTIONARY);
+ if (dictionaryArg == null) {
+ throw new InitializationException("Parameter " + PARAM_DICTIONARY + " is mandatory.");
+ }
+ String dictionaryFiles[] = args.get(PARAM_DICTIONARY).split(",");
+ String affixFile = args.get(PARAM_AFFIX);
+ String pic = args.get(PARAM_IGNORE_CASE);
+ if(pic != null) {
+ if(pic.equalsIgnoreCase(TRUE)) ignoreCase = true;
+ else if(pic.equalsIgnoreCase(FALSE)) ignoreCase = false;
+ else throw new InitializationException("Unknown value for " + PARAM_IGNORE_CASE + ": " + pic + ". Must be true or false");
+ }
+
+ String strictAffixParsingParam = args.get(PARAM_STRICT_AFFIX_PARSING);
+ boolean strictAffixParsing = true;
+ if(strictAffixParsingParam != null) {
+ if(strictAffixParsingParam.equalsIgnoreCase(FALSE)) strictAffixParsing = false;
+ else if(strictAffixParsingParam.equalsIgnoreCase(TRUE)) strictAffixParsing = true;
+ else throw new InitializationException("Unknown value for " + PARAM_STRICT_AFFIX_PARSING + ": " + strictAffixParsingParam + ". Must be true or false");
+ }
+
+ InputStream affix = null;
+ List
+ * <fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class IndonesianStemFilterFactory extends TokenFilterFactory {
+ private boolean stemDerivational = true;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.IndicNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class IndicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public TokenStream create(TokenStream input) {
+ return new IndicNormalizationFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java (working copy)
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.it;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.it.ItalianLightStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link ItalianLightStemFilter}.
+ *
+ * <fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.ItalianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ItalianLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new ItalianLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.lv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.lv.LatvianStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link LatvianStemFilter}.
+ *
+ * <fieldType name="text_lvstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.LatvianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class LatvianStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new LatvianStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Factory for {@link ASCIIFoldingFilter}.
+ *
+ * <fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ASCIIFoldingFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public ASCIIFoldingFilter create(TokenStream input) {
+ return new ASCIIFoldingFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java (working copy)
@@ -0,0 +1,140 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.CapitalizationFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.StringTokenizer;
+
+/**
+ * Factory for {@link CapitalizationFilter}.
+ *
+ * The factory takes parameters:
+ * "onlyFirstWord" - should each word be capitalized or all of the words?
+ * "keep" - a keep word list. Each word that should be kept separated by whitespace.
+ * "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.
+ * "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list
+ * "okPrefix" - do not change word capitalization if a word begins with something in this list.
+ * for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
+ * "Mckinley"
+ * "minWordLength" - how long the word needs to be to get capitalization applied. If the
+ * minWordLength is 3, "and" > "And" but "or" stays "or"
+ * "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
+ * assumed to be correct.
+ *
+ *
+ * <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
+ * keep="java solr lucene" keepIgnoreCase="false"
+ * okPrefix="McK McD McA"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ * @since solr 1.3
+ */
+public class CapitalizationFilterFactory extends TokenFilterFactory {
+ public static final String KEEP = "keep";
+ public static final String KEEP_IGNORE_CASE = "keepIgnoreCase";
+ public static final String OK_PREFIX = "okPrefix";
+ public static final String MIN_WORD_LENGTH = "minWordLength";
+ public static final String MAX_WORD_COUNT = "maxWordCount";
+ public static final String MAX_TOKEN_LENGTH = "maxTokenLength";
+ public static final String ONLY_FIRST_WORD = "onlyFirstWord";
+ public static final String FORCE_FIRST_LETTER = "forceFirstLetter";
+
+ //Map
+ * <fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.HyphenatedWordsFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class HyphenatedWordsFilterFactory extends TokenFilterFactory {
+ public HyphenatedWordsFilter create(TokenStream input) {
+ return new HyphenatedWordsFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilterFactory.java
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java (working copy)
@@ -0,0 +1,96 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
+
+import java.util.Map;
+import java.util.Set;
+import java.io.IOException;
+
+/**
+ * Factory for {@link KeepWordFilter}.
+ *
+ * <fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false" enablePositionIncrements="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class KeepWordFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" ignoreCase="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class KeywordMarkerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ public static final String PROTECTED_TOKENS = "protected";
+ private CharArraySet protectedWords;
+ private boolean ignoreCase;
+
+ public void inform(ResourceLoader loader) {
+ String wordFiles = args.get(PROTECTED_TOKENS);
+ ignoreCase = getBoolean("ignoreCase", false);
+ if (wordFiles != null) {
+ try {
+ protectedWords = getWordSet(loader, wordFiles, ignoreCase);
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading protected words", e);
+ }
+ }
+ }
+
+ public boolean isIgnoreCase() {
+ return ignoreCase;
+ }
+
+ public TokenStream create(TokenStream input) {
+ return protectedWords == null ? input : new KeywordMarkerFilter(input, protectedWords);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java (working copy)
@@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.LengthFilter;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+import java.util.Map;
+
+/**
+ * Factory for {@link LengthFilter}.
+ *
+ * <fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.LengthFilterFactory" min="0" max="1" enablePositionIncrements="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class LengthFilterFactory extends TokenFilterFactory {
+ int min,max;
+ boolean enablePositionIncrements;
+ public static final String MIN_KEY = "min";
+ public static final String MAX_KEY = "max";
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class LimitTokenCountFilterFactory extends TokenFilterFactory {
+
+ int maxTokenCount;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class RemoveDuplicatesTokenFilterFactory extends TokenFilterFactory {
+ public RemoveDuplicatesTokenFilter create(TokenStream input) {
+ return new RemoveDuplicatesTokenFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java (working copy)
@@ -0,0 +1,73 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
+import org.apache.lucene.analysis.util.*;
+
+/**
+ * Factory for {@link StemmerOverrideFilter}.
+ *
+ * <fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class StemmerOverrideFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ private CharArrayMap
+ * <fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.NGramTokenizerFactory"/>
+ * <filter class="solr.TrimFilterFactory" updateOffsets="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ * @see TrimFilter
+ */
+public class TrimFilterFactory extends TokenFilterFactory {
+
+ protected boolean updateOffsets = false;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.WordDelimiterFilterFactory" protected="protectedword.txt"
+ * preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
+ * catenateWords="0" catenateNumbers="0" catenateAll="0"
+ * generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"
+ * types="wdfftypes.txt" />
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class WordDelimiterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ public static final String PROTECTED_TOKENS = "protected";
+ public static final String TYPES = "types";
+
+ public void inform(ResourceLoader loader) {
+ String wordFiles = args.get(PROTECTED_TOKENS);
+ if (wordFiles != null) {
+ try {
+ protectedWords = getWordSet(loader, wordFiles, false);
+ } catch (IOException e) {
+ throw new InitializationException("IOException thrown while loading protected words", e);
+ }
+ }
+ String types = args.get(TYPES);
+ if (types != null) {
+ try {
+ List
+ * <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class EdgeNGramFilterFactory extends TokenFilterFactory {
+ private int maxGramSize = 0;
+
+ private int minGramSize = 0;
+
+ private String side;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class EdgeNGramTokenizerFactory extends TokenizerFactory {
+ private int maxGramSize = 0;
+
+ private int minGramSize = 0;
+
+ private String side;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class NGramFilterFactory extends TokenFilterFactory {
+ private int maxGramSize = 0;
+
+ private int minGramSize = 0;
+
+ /** Initialize the n-gram min and max sizes and the side from which one should start tokenizing. */
+ @Override
+ public void init(Map
+ * <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class NGramTokenizerFactory extends TokenizerFactory {
+ private int maxGramSize = 0;
+ private int minGramSize = 0;
+
+ /** Initializes the n-gram min and max sizes and the side from which one should start tokenizing. */
+ @Override
+ public void init(Map
+ * <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.NorwegianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class NorwegianLightStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new NorwegianLightStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java (working copy)
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.no;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link NorwegianMinimalStemFilter}.
+ *
+ * <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.LowerCaseFilterFactory"/>
+ * <filter class="solr.NorwegianMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class NorwegianMinimalStemFilterFactory extends TokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new NorwegianMinimalStemFilter(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java (working copy)
@@ -0,0 +1,98 @@
+package org.apache.lucene.analysis.path;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
+import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
+import org.apache.lucene.analysis.util.InitializationException;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+/**
+ * Factory for {@link PathHierarchyTokenizer}.
+ *
+ * <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class PathHierarchyTokenizerFactory extends TokenizerFactory {
+
+ private char delimiter;
+ private char replacement;
+ private boolean reverse = false;
+ private int skip = PathHierarchyTokenizer.DEFAULT_SKIP;
+
+ /**
+ * Require a configured pattern
+ */
+ @Override
+ public void init(Map
+ * <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.PatternReplaceCharFilterFactory"
+ * pattern="([^a-z])" replacement=""/>
+ * <tokenizer class="solr.KeywordTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ * @since Solr 3.1
+ */
+public class PatternReplaceCharFilterFactory extends CharFilterFactory {
+
+ private Pattern p;
+ private String replacement;
+ private int maxBlockChars;
+ private String blockDelimiters;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.KeywordTokenizerFactory"/>
+ * <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement=""
+ * replace="all"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ * @see PatternReplaceFilter
+ */
+public class PatternReplaceFilterFactory extends TokenFilterFactory {
+ Pattern p;
+ String replacement;
+ boolean all = true;
+
+ @Override
+ public void init(Map
+ *
+ *
+ *
+ * pattern = \'([^\']+)\'
+ * group = 0
+ * input = aaa 'bbb' 'ccc'
+ *
+ * the output will be two tokens: 'bbb' and 'ccc' (including the ' marks). With the same input
+ * but using group=1, the output would be: bbb and ccc (no ' marks)
+ *
NOTE: This Tokenizer does not output tokens that are of zero length.
+ * + *+ * <fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.PatternTokenizerFactory" pattern="\'([^\']+)\'" group="1"/> + * </analyzer> + * </fieldType>+ * + * @see PatternTokenizer + * @since solr1.2 + * + */ +public class PatternTokenizerFactory extends TokenizerFactory +{ + public static final String PATTERN = "pattern"; + public static final String GROUP = "group"; + + protected Pattern pattern; + protected int group; + + /** + * Require a configured pattern + */ + @Override + public void init(Map
+ * <fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/> + * </analyzer> + * </fieldType>+ * + * + */ +public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { + public static final String ENCODER_ATTR = "encoder"; + public static final String DELIMITER_ATTR = "delimiter"; + + private PayloadEncoder encoder; + private char delimiter = '|'; + + public DelimitedPayloadTokenFilter create(TokenStream input) { + return new DelimitedPayloadTokenFilter(input, delimiter, encoder); + } + + @Override + public void init(Map
+ * <fieldType name="text_numpayload" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/> + * </analyzer> + * </fieldType>+ * + */ +public class NumericPayloadTokenFilterFactory extends TokenFilterFactory { + private float payload; + private String typeMatch; + @Override + public void init(Map
+ * <fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.TokenOffsetPayloadTokenFilterFactory"/> + * </analyzer> + * </fieldType>+ * + */ +public class TokenOffsetPayloadTokenFilterFactory extends TokenFilterFactory { + public TokenOffsetPayloadTokenFilter create(TokenStream input) { + return new TokenOffsetPayloadTokenFilter(input); + } +} + Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterFactory.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java (working copy) @@ -0,0 +1,40 @@ +package org.apache.lucene.analysis.payloads; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +/** + * Factory for {@link TypeAsPayloadTokenFilter}. + *
+ * <fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.TypeAsPayloadTokenFilterFactory"/> + * </analyzer> + * </fieldType>+ * + */ +public class TypeAsPayloadTokenFilterFactory extends TokenFilterFactory { + public TypeAsPayloadTokenFilter create(TokenStream input) { + return new TypeAsPayloadTokenFilter(input); + } +} + Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterFactory.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java (working copy) @@ -0,0 +1,55 @@ +package org.apache.lucene.analysis.position; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.position.PositionFilter; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +import java.util.Map; + +/** + * Factory for {@link PositionFilter}. + * Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its + * original positionIncrement value. The default positionIncrement value is zero. + *
+ * <fieldType name="text_position" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.PositionFilterFactory" positionIncrement="0"/> + * </analyzer> + * </fieldType>+ * + * + * @see org.apache.lucene.analysis.position.PositionFilter + * @since solr 1.4 + */ +public class PositionFilterFactory extends TokenFilterFactory { + private int positionIncrement; + + @Override + public void init(Map
+ * <fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.PortugueseLightStemFilterFactory"/> + * </analyzer> + * </fieldType>+ * + */ +public class PortugueseLightStemFilterFactory extends TokenFilterFactory { + public TokenStream create(TokenStream input) { + return new PortugueseLightStemFilter(input); + } +} Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (working copy) @@ -0,0 +1,40 @@ +package org.apache.lucene.analysis.pt; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +/** + * Factory for {@link PortugueseMinimalStemFilter}. + *
+ * <fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.PortugueseMinimalStemFilterFactory"/> + * </analyzer> + * </fieldType>+ * + */ +public class PortugueseMinimalStemFilterFactory extends TokenFilterFactory { + public TokenStream create(TokenStream input) { + return new PortugueseMinimalStemFilter(input); + } +} Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (working copy) @@ -0,0 +1,40 @@ +package org.apache.lucene.analysis.pt; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.pt.PortugueseStemFilter; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +/** + * Factory for {@link PortugueseStemFilter}. + *
+ * <fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.PortugueseStemFilterFactory"/> + * </analyzer> + * </fieldType>+ * + */ +public class PortugueseStemFilterFactory extends TokenFilterFactory { + public TokenStream create(TokenStream input) { + return new PortugueseStemFilter(input); + } +} Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (working copy) @@ -0,0 +1,43 @@ +package org.apache.lucene.analysis.reverse; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.reverse.ReverseStringFilter; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +/** + * Factory for {@link ReverseStringFilter}. + *
+ * <fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.ReverseStringFilterFactory"/> + * </analyzer> + * </fieldType>+ * + * + * @since solr 1.4 + */ +public class ReverseStringFilterFactory extends TokenFilterFactory { + public ReverseStringFilter create(TokenStream in) { + assureMatchVersion(); + return new ReverseStringFilter(luceneMatchVersion,in); + } +} + Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizerFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizerFactory.java (working copy) @@ -0,0 +1,48 @@ +package org.apache.lucene.analysis.ru; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.util.Map; + +import org.apache.lucene.analysis.ru.RussianLetterTokenizer; +import org.apache.lucene.analysis.standard.StandardTokenizerFactory; // javadocs +import org.apache.lucene.analysis.util.InitializationException; +import org.apache.lucene.analysis.util.TokenizerFactory; + +/** @deprecated Use {@link StandardTokenizerFactory} instead. + * This tokenizer has no Russian-specific functionality. + */ +@Deprecated +public class RussianLetterTokenizerFactory extends TokenizerFactory { + + @Override + public void init(Map
+ * <fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.RussianLightStemFilterFactory"/> + * </analyzer> + * </fieldType>+ * + */ +public class RussianLightStemFilterFactory extends TokenFilterFactory { + public TokenStream create(TokenStream input) { + return new RussianLightStemFilter(input); + } +} Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java (working copy) @@ -0,0 +1,80 @@ +package org.apache.lucene.analysis.shingle; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.shingle.ShingleFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.InitializationException; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +import java.util.Map; + +/** + * Factory for {@link ShingleFilter}. + *
+ * <fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="2" + * outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/> + * </analyzer> + * </fieldType>+ * + */ +public class ShingleFilterFactory extends TokenFilterFactory { + private int minShingleSize; + private int maxShingleSize; + private boolean outputUnigrams; + private boolean outputUnigramsIfNoShingles; + private String tokenSeparator; + + @Override + public void init(Map
+ * Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection. + *
+ * <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/> + * </analyzer> + * </fieldType>+ * + * + */ +public class SnowballPorterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { + public static final String PROTECTED_TOKENS = "protected"; + + private String language = "English"; + private Class> stemClass; + + + public void inform(ResourceLoader loader) { + String wordFiles = args.get(PROTECTED_TOKENS); + if (wordFiles != null) { + try { + protectedWords = getWordSet(loader, wordFiles, false); + } catch (IOException e) { + throw new InitializationException("IOException thrown while loading protected words", e); + } + } + } + + private CharArraySet protectedWords = null; + + @Override + public void init(Map
+ * <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.ClassicTokenizerFactory"/> + * <filter class="solr.ClassicFilterFactory"/> + * </analyzer> + * </fieldType>+ * + * + */ +public class ClassicFilterFactory extends TokenFilterFactory { + public TokenFilter create(TokenStream input) { + return new ClassicFilter(input); + } +} Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (working copy) @@ -0,0 +1,57 @@ +package org.apache.lucene.analysis.standard; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.ClassicTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.util.TokenizerFactory; + +import java.io.Reader; +import java.util.Map; + +/** + * Factory for {@link ClassicTokenizer}. + *
+ * <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/> + * </analyzer> + * </fieldType>+ * + * + */ + +public class ClassicTokenizerFactory extends TokenizerFactory { + + private int maxTokenLength; + + @Override + public void init(Map
+ * <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.StandardFilterFactory"/> + * </analyzer> + * </fieldType>+ * + */ +public class StandardFilterFactory extends TokenFilterFactory { + @Override + public void init(Map
+ * <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/> + * </analyzer> + * </fieldType>+ * + */ + +public class StandardTokenizerFactory extends TokenizerFactory { + + private int maxTokenLength; + + @Override + public void init(Map
+ * <fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/> + * </analyzer> + * </fieldType>+ * + * + */ + +public class UAX29URLEmailTokenizerFactory extends TokenizerFactory { + + private int maxTokenLength; + + @Override + public void init(Map
+ * <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.SwedishLightStemFilterFactory"/> + * </analyzer> + * </fieldType>+ * + */ +public class SwedishLightStemFilterFactory extends TokenFilterFactory { + public TokenStream create(TokenStream input) { + return new SwedishLightStemFilter(input); + } +} Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (revision 1365586) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +HeadURL \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FSTSynonymFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FSTSynonymFilterFactory.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/FSTSynonymFilterFactory.java (working copy) @@ -0,0 +1,159 @@ +package org.apache.lucene.analysis.synonym; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.text.ParseException; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.synonym.SynonymFilter; +import org.apache.lucene.analysis.synonym.SynonymMap; +import org.apache.lucene.analysis.synonym.SolrSynonymParser; +import org.apache.lucene.analysis.synonym.WordnetSynonymParser; +import org.apache.lucene.analysis.util.*; +import org.apache.lucene.util.Version; + +/** + * @deprecated (3.4) use {@link SynonymFilterFactory} instead. this is only a backwards compatibility + * mechanism that will be removed in Lucene 5.0 + */ +// NOTE: rename this to "SynonymFilterFactory" and nuke that delegator in Lucene 5.0! +@Deprecated +final class FSTSynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { + + private SynonymMap map; + private boolean ignoreCase; + + @Override + public TokenStream create(TokenStream input) { + // if the fst is null, it means there's actually no synonyms... just return the original stream + // as there is nothing to do here. + return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase); + } + + @Override + public void inform(ResourceLoader loader) { + final boolean ignoreCase = getBoolean("ignoreCase", false); + this.ignoreCase = ignoreCase; + + String tf = args.get("tokenizerFactory"); + + final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf); + + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader); + TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer; + return new TokenStreamComponents(tokenizer, stream); + } + }; + + String format = args.get("format"); + try { + if (format == null || format.equals("solr")) { + // TODO: expose dedup as a parameter? + map = loadSolrSynonyms(loader, true, analyzer); + } else if (format.equals("wordnet")) { + map = loadWordnetSynonyms(loader, true, analyzer); + } else { + // TODO: somehow make this more pluggable + throw new InitializationException("Unrecognized synonyms format: " + format); + } + } catch (Exception e) { + throw new InitializationException("Exception thrown while loading synonyms", e); + } + } + + /** + * Load synonyms from the solr format, "format=solr". + */ + private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException { + final boolean expand = getBoolean("expand", true); + String synonyms = args.get("synonyms"); + if (synonyms == null) + throw new InitializationException("Missing required argument 'synonyms'."); + + CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + + SolrSynonymParser parser = new SolrSynonymParser(dedup, expand, analyzer); + File synonymFile = new File(synonyms); + if (synonymFile.exists()) { + decoder.reset(); + parser.add(new InputStreamReader(loader.openResource(synonyms), decoder)); + } else { + List
+ * The matched tokens from the input stream may be optionally passed through (includeOrig=true) + * or discarded. If the original tokens are included, the position increments may be modified + * to retain absolute positions after merging with the synonym tokenstream. + *
+ * Generated synonyms will start at the same position as the first matched source token.
+ * @deprecated (3.4) use {@link SynonymFilterFactory} instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
+ */
+@Deprecated
+final class SlowSynonymFilter extends TokenFilter {
+
+ private final SlowSynonymMap map; // Map
+ * Current backslash escaping supported:
+ *
+ * This factory can be created in two ways:
+ *
+ * Using a System collator:
+ *
+ * Using a Tailored ruleset:
+ *
+ * <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
+ * expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ * @deprecated (3.4) use {@link SynonymFilterFactory} instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
+ */
+@Deprecated
+final class SlowSynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+
+ public void inform(ResourceLoader loader) {
+ String synonyms = args.get("synonyms");
+ if (synonyms == null)
+ throw new InitializationException("Missing required argument 'synonyms'.");
+ boolean ignoreCase = getBoolean("ignoreCase", false);
+ boolean expand = getBoolean("expand", true);
+
+ String tf = args.get("tokenizerFactory");
+ TokenizerFactory tokFactory = null;
+ if( tf != null ){
+ tokFactory = loadTokenizerFactory(loader, tf);
+ }
+
+ Iterable> source;
+ List
> target;
+
+ if (mapping.size() > 2) {
+ throw new InitializationException("Invalid Synonym Rule:" + rule);
+ } else if (mapping.size()==2) {
+ source = getSynList(mapping.get(0), synSep, tokFactory);
+ target = getSynList(mapping.get(1), synSep, tokFactory);
+ } else {
+ source = getSynList(mapping.get(0), synSep, tokFactory);
+ if (expansion) {
+ // expand to all arguments
+ target = source;
+ } else {
+ // reduce to first argument
+ target = new ArrayList
>(1);
+ target.add(source.get(0));
+ }
+ }
+
+ boolean includeOrig=false;
+ for (List
> getSynList(String str, String separator, TokenizerFactory tokFactory) {
+ List
> synList = new ArrayList
>();
+ for (String toks : strList) {
+ List
\n \t \r \b \f are escaped the same as a Java String
+ *
Other characters following a backslash are produced verbatim (\c => c)
+ *
+ * @param s the string to split
+ * @param separator the separator to split on
+ * @param decode decode backslash escaping
+ */
+ public static List
+ * <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
+ * format="solr" ignoreCase="false" expand="true"
+ * tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ */
+public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ private TokenFilterFactory delegator;
+
+ @Override
+ public void init(Map
+ * <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.ThaiWordFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class ThaiWordFilterFactory extends TokenFilterFactory {
+ public ThaiWordFilter create(TokenStream input) {
+ assureMatchVersion();
+ return new ThaiWordFilter(luceneMatchVersion, input);
+ }
+}
+
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (working copy)
@@ -0,0 +1,46 @@
+package org.apache.lucene.analysis.tr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link TurkishLowerCaseFilter}.
+ *
+ * <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.TurkishLowerCaseFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class TurkishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ public TokenStream create(TokenStream input) {
+ return new TurkishLowerCaseFilter(input);
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java (working copy)
@@ -0,0 +1,103 @@
+package org.apache.lucene.analysis.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Locale;
+import java.util.Map;
+import java.util.LinkedHashMap;
+import java.util.Set;
+import java.util.ServiceConfigurationError;
+
+import org.apache.lucene.util.SPIClassIterator;
+
+/**
+ * Helper class for loading named SPIs from classpath (e.g. Tokenizers, TokenStreams).
+ * @lucene.internal
+ */
+public final class AnalysisSPILoader {
+
+ private final Map clazz;
+
+ public AnalysisSPILoader(Class clazz) {
+ this(clazz, new String[] { clazz.getSimpleName() });
+ }
+
+ public AnalysisSPILoader(Class clazz, ClassLoader loader) {
+ this(clazz, new String[] { clazz.getSimpleName() }, loader);
+ }
+
+ public AnalysisSPILoader(Class clazz, String[] suffixes) {
+ this(clazz, suffixes, Thread.currentThread().getContextClassLoader());
+ }
+
+ public AnalysisSPILoader(Class clazz, String[] suffixes, ClassLoader classloader) {
+ this.clazz = clazz;
+ final SPIClassIterator loader = SPIClassIterator.get(clazz, classloader);
+ final LinkedHashMap
+ * <fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WikipediaTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class WikipediaTokenizerFactory extends TokenizerFactory {
+ // TODO: add support for WikipediaTokenizer's advanced options.
+ public Tokenizer create(Reader input) {
+ return new WikipediaTokenizer(input);
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (revision 1365586)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilterFactory.java (revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilterFactory.java (working copy)
@@ -0,0 +1,187 @@
+package org.apache.lucene.collation;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.text.Collator;
+import java.text.ParseException;
+import java.text.RuleBasedCollator;
+import java.util.Locale;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.collation.CollationKeyFilter;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Factory for {@link CollationKeyFilter}.
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ * <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.KeywordTokenizerFactory"/>
+ * <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ * @see Collator
+ * @see Locale
+ * @see RuleBasedCollator
+ * @since solr 3.1
+ * @deprecated use {@link CollationKeyAnalyzer} instead.
+ */
+@Deprecated
+public class CollationKeyFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent, ResourceLoaderAware {
+ private Collator collator;
+
+ public void inform(ResourceLoader loader) {
+ String custom = args.get("custom");
+ String language = args.get("language");
+ String country = args.get("country");
+ String variant = args.get("variant");
+ String strength = args.get("strength");
+ String decomposition = args.get("decomposition");
+
+ if (custom == null && language == null)
+ throw new InitializationException("Either custom or language is required.");
+
+ if (custom != null &&
+ (language != null || country != null || variant != null))
+ throw new InitializationException("Cannot specify both language and custom. "
+ + "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. "
+ + "Then save the entire customized ruleset to a file, and use with the custom parameter");
+
+ if (language != null) {
+ // create from a system collator, based on Locale.
+ collator = createFromLocale(language, country, variant);
+ } else {
+ // create from a custom ruleset
+ collator = createFromRules(custom, loader);
+ }
+
+ // set the strength flag, otherwise it will be the default.
+ if (strength != null) {
+ if (strength.equalsIgnoreCase("primary"))
+ collator.setStrength(Collator.PRIMARY);
+ else if (strength.equalsIgnoreCase("secondary"))
+ collator.setStrength(Collator.SECONDARY);
+ else if (strength.equalsIgnoreCase("tertiary"))
+ collator.setStrength(Collator.TERTIARY);
+ else if (strength.equalsIgnoreCase("identical"))
+ collator.setStrength(Collator.IDENTICAL);
+ else
+ throw new InitializationException("Invalid strength: " + strength);
+ }
+
+ // set the decomposition flag, otherwise it will be the default.
+ if (decomposition != null) {
+ if (decomposition.equalsIgnoreCase("no"))
+ collator.setDecomposition(Collator.NO_DECOMPOSITION);
+ else if (decomposition.equalsIgnoreCase("canonical"))
+ collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
+ else if (decomposition.equalsIgnoreCase("full"))
+ collator.setDecomposition(Collator.FULL_DECOMPOSITION);
+ else
+ throw new InitializationException("Invalid decomposition: " + decomposition);
+ }
+ }
+
+ public TokenStream create(TokenStream input) {
+ return new CollationKeyFilter(input, collator);
+ }
+
+ /*
+ * Create a locale from language, with optional country and variant.
+ * Then return the appropriate collator for the locale.
+ */
+ private Collator createFromLocale(String language, String country, String variant) {
+ Locale locale;
+
+ if (language != null && country == null && variant != null)
+ throw new InitializationException("To specify variant, country is required");
+ else if (language != null && country != null && variant != null)
+ locale = new Locale(language, country, variant);
+ else if (language != null && country != null)
+ locale = new Locale(language, country);
+ else
+ locale = new Locale(language);
+
+ return Collator.getInstance(locale);
+ }
+
+ /*
+ * Read custom rules from a file, and create a RuleBasedCollator
+ * The file cannot support comments, as # might be in the rules!
+ */
+ private Collator createFromRules(String fileName, ResourceLoader loader) {
+ InputStream input = null;
+ try {
+ input = loader.openResource(fileName);
+ String rules = toUTF8String(input);
+ return new RuleBasedCollator(rules);
+ } catch (IOException e) {
+ // io error
+ throw new InitializationException("IOException thrown while loading rules", e);
+ } catch (ParseException e) {
+ // invalid rules
+ throw new InitializationException("ParseException thrown while parsing rules", e);
+ } finally {
+ IOUtils.closeWhileHandlingException(input);
+ }
+ }
+
+ @Override
+ public AbstractAnalysisFactory getMultiTermComponent() {
+ return this;
+ }
+
+ private String toUTF8String(InputStream in) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ char buffer[] = new char[1024];
+ Reader r = IOUtils.getDecodingReader(in, IOUtils.CHARSET_UTF_8);
+ int len = 0;
+ while ((len = r.read(buffer)) > 0) {
+ sb.append(buffer, 0, len);
+ }
+ return sb.toString();
+ }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilterFactory.java (revision 1365593)
+++ lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilterFactory.java (working copy)
Property changes on: lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+HeadURL
\ No newline at end of property
Index: lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory
===================================================================
--- lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory (revision 0)
+++ lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory (working copy)
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory
+org.apache.lucene.analysis.charfilter.MappingCharFilterFactory
+org.apache.lucene.analysis.fa.PersianCharFilterFactory
+org.apache.lucene.analysis.pattern.PatternReplaceCharFilterFactory
Index: lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
===================================================================
--- lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (revision 0)
+++ lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (working copy)
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.lucene.analysis.ar.ArabicNormalizationFilterFactory
+org.apache.lucene.analysis.ar.ArabicStemFilterFactory
+org.apache.lucene.analysis.bg.BulgarianStemFilterFactory
+org.apache.lucene.analysis.br.BrazilianStemFilterFactory
+org.apache.lucene.analysis.cjk.CJKBigramFilterFactory
+org.apache.lucene.analysis.cjk.CJKWidthFilterFactory
+org.apache.lucene.analysis.cn.ChineseFilterFactory
+org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory
+org.apache.lucene.analysis.commongrams.CommonGramsQueryFilterFactory
+org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilterFactory
+org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilterFactory
+org.apache.lucene.analysis.core.LowerCaseFilterFactory
+org.apache.lucene.analysis.core.StopFilterFactory
+org.apache.lucene.analysis.core.TypeTokenFilterFactory
+org.apache.lucene.analysis.cz.CzechStemFilterFactory
+org.apache.lucene.analysis.de.GermanLightStemFilterFactory
+org.apache.lucene.analysis.de.GermanMinimalStemFilterFactory
+org.apache.lucene.analysis.de.GermanNormalizationFilterFactory
+org.apache.lucene.analysis.de.GermanStemFilterFactory
+org.apache.lucene.analysis.el.GreekLowerCaseFilterFactory
+org.apache.lucene.analysis.el.GreekStemFilterFactory
+org.apache.lucene.analysis.en.EnglishMinimalStemFilterFactory
+org.apache.lucene.analysis.en.EnglishPossessiveFilterFactory
+org.apache.lucene.analysis.en.KStemFilterFactory
+org.apache.lucene.analysis.en.PorterStemFilterFactory
+org.apache.lucene.analysis.es.SpanishLightStemFilterFactory
+org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory
+org.apache.lucene.analysis.fi.FinnishLightStemFilterFactory
+org.apache.lucene.analysis.fr.ElisionFilterFactory
+org.apache.lucene.analysis.fr.FrenchLightStemFilterFactory
+org.apache.lucene.analysis.fr.FrenchMinimalStemFilterFactory
+org.apache.lucene.analysis.ga.IrishLowerCaseFilterFactory
+org.apache.lucene.analysis.gl.GalicianMinimalStemFilterFactory
+org.apache.lucene.analysis.gl.GalicianStemFilterFactory
+org.apache.lucene.analysis.hi.HindiNormalizationFilterFactory
+org.apache.lucene.analysis.hi.HindiStemFilterFactory
+org.apache.lucene.analysis.hu.HungarianLightStemFilterFactory
+org.apache.lucene.analysis.hunspell.HunspellStemFilterFactory
+org.apache.lucene.analysis.id.IndonesianStemFilterFactory
+org.apache.lucene.analysis.in.IndicNormalizationFilterFactory
+org.apache.lucene.analysis.it.ItalianLightStemFilterFactory
+org.apache.lucene.analysis.lv.LatvianStemFilterFactory
+org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory
+org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
+org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory
+org.apache.lucene.analysis.miscellaneous.KeepWordFilterFactory
+org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilterFactory
+org.apache.lucene.analysis.miscellaneous.LengthFilterFactory
+org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory
+org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory
+org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory
+org.apache.lucene.analysis.miscellaneous.TrimFilterFactory
+org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory
+org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory
+org.apache.lucene.analysis.ngram.NGramFilterFactory
+org.apache.lucene.analysis.no.NorwegianLightStemFilterFactory
+org.apache.lucene.analysis.no.NorwegianMinimalStemFilterFactory
+org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory
+org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory
+org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterFactory
+org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory
+org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory
+org.apache.lucene.analysis.position.PositionFilterFactory
+org.apache.lucene.analysis.pt.PortugueseLightStemFilterFactory
+org.apache.lucene.analysis.pt.PortugueseMinimalStemFilterFactory
+org.apache.lucene.analysis.pt.PortugueseStemFilterFactory
+org.apache.lucene.analysis.reverse.ReverseStringFilterFactory
+org.apache.lucene.analysis.ru.RussianLightStemFilterFactory
+org.apache.lucene.analysis.shingle.ShingleFilterFactory
+org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory
+org.apache.lucene.analysis.standard.ClassicFilterFactory
+org.apache.lucene.analysis.standard.StandardFilterFactory
+org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory
+org.apache.lucene.analysis.synonym.SynonymFilterFactory
+org.apache.lucene.analysis.th.ThaiWordFilterFactory
+org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory
+org.apache.lucene.collation.CollationKeyFilterFactory
Index: lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
===================================================================
--- lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (revision 0)
+++ lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (working copy)
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.lucene.analysis.ar.ArabicLetterTokenizerFactory
+org.apache.lucene.analysis.cjk.CJKTokenizerFactory
+org.apache.lucene.analysis.cn.ChineseTokenizerFactory
+org.apache.lucene.analysis.core.KeywordTokenizerFactory
+org.apache.lucene.analysis.core.LetterTokenizerFactory
+org.apache.lucene.analysis.core.LowerCaseTokenizerFactory
+org.apache.lucene.analysis.core.WhitespaceTokenizerFactory
+org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory
+org.apache.lucene.analysis.ngram.NGramTokenizerFactory
+org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory
+org.apache.lucene.analysis.pattern.PatternTokenizerFactory
+org.apache.lucene.analysis.ru.RussianLetterTokenizerFactory
+org.apache.lucene.analysis.standard.ClassicTokenizerFactory
+org.apache.lucene.analysis.standard.StandardTokenizerFactory
+org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory
+org.apache.lucene.analysis.wikipedia.WikipediaTokenizerFactory
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java (revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java (working copy)
@@ -0,0 +1,99 @@
+package org.apache.lucene.analysis.ar;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.fa.PersianCharFilterFactory;
+import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
+
+/**
+ * Simple tests to ensure the Arabic filter Factories are working.
+ */
+public class TestArabicFilters extends BaseTokenStreamTestCase {
+ /**
+ * Test ArabicLetterTokenizerFactory
+ * @deprecated (3.1) Remove in Lucene 5.0
+ */
+ @Deprecated
+ public void testTokenizer() throws Exception {
+ Reader reader = new StringReader("الذين مَلكت أيمانكم");
+ ArabicLetterTokenizerFactory factory = new ArabicLetterTokenizerFactory();
+ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
+ Map