Index: solr/src/test/org/apache/solr/analysis/TestHungarianLightStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestHungarianLightStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestHungarianLightStemFilterFactory.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Hungarian light stem factory is working. + */ +public class TestHungarianLightStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + /* + Reader reader = new StringReader("administrativement"); + HungarianLightStemFilterFactory factory = new HungarianLightStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "administratif" }); + */ + System.err.println("add tests"); + fail(); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestHungarianLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestRussianLightStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestRussianLightStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestRussianLightStemFilterFactory.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Russian light stem factory is working. + */ +public class TestRussianLightStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + /* + Reader reader = new StringReader("administrativement"); + RussianLightStemFilterFactory factory = new RussianLightStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "administratif" }); + */ + System.err.println("add tests"); + fail(); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestRussianLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestPortugueseMinimalStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestPortugueseMinimalStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestPortugueseMinimalStemFilterFactory.java (revision 0) @@ -0,0 +1,36 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Portuguese Minimal stem factory is working. + */ +public class TestPortugueseMinimalStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + Reader reader = new StringReader("questões"); + PortugueseMinimalStemFilterFactory factory = new PortugueseMinimalStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "questão" }); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestPortugueseMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestFrenchLightStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestFrenchLightStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestFrenchLightStemFilterFactory.java (revision 0) @@ -0,0 +1,36 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the French light stem factory is working. + */ +public class TestFrenchLightStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + Reader reader = new StringReader("administrativement"); + FrenchLightStemFilterFactory factory = new FrenchLightStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "administratif" }); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestFrenchLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestGermanLightStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestGermanLightStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestGermanLightStemFilterFactory.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the German light stem factory is working. + */ +public class TestGermanLightStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + /*Reader reader = new StringReader("administrativement"); + GermanLightStemFilterFactory factory = new GermanLightStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "administratif" }); */ + System.err.println("add tests"); + fail(); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestGermanLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestItalianLightStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestItalianLightStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestItalianLightStemFilterFactory.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Italian light stem factory is working. + */ +public class TestItalianLightStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + /* + Reader reader = new StringReader("administrativement"); + ItalianLightStemFilterFactory factory = new ItalianLightStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "administratif" }); + */ + System.err.println("add tests"); + fail(); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestItalianLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestFrenchMinimalStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestFrenchMinimalStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestFrenchMinimalStemFilterFactory.java (revision 0) @@ -0,0 +1,36 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the French minimal stem factory is working. + */ +public class TestFrenchMinimalStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + Reader reader = new StringReader("chevaux"); + FrenchMinimalStemFilterFactory factory = new FrenchMinimalStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "cheval" }); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestFrenchMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestGermanMinimalStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestGermanMinimalStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestGermanMinimalStemFilterFactory.java (revision 0) @@ -0,0 +1,36 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the German minimal stem factory is working. + */ +public class TestGermanMinimalStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + Reader reader = new StringReader("bilder"); + GermanMinimalStemFilterFactory factory = new GermanMinimalStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "bild" }); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestGermanMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestPortugueseLightStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestPortugueseLightStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestPortugueseLightStemFilterFactory.java (revision 0) @@ -0,0 +1,36 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Portuguese Light stem factory is working. + */ +public class TestPortugueseLightStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + Reader reader = new StringReader("evidentemente"); + PortugueseLightStemFilterFactory factory = new PortugueseLightStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "evident" }); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestPortugueseLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestSpanishLightStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestSpanishLightStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestSpanishLightStemFilterFactory.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Spanish Light stem factory is working. + */ +public class TestSpanishLightStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + /* + Reader reader = new StringReader("evidentemente"); + SpanishLightStemFilterFactory factory = new SpanishLightStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "evident" }); + */ + System.err.println("add tests"); + fail(); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestSpanishLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestSwedishLightStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestSwedishLightStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestSwedishLightStemFilterFactory.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Swedish Light stem factory is working. + */ +public class TestSwedishLightStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + /* + Reader reader = new StringReader("evidentemente"); + SwedishLightStemFilterFactory factory = new SwedishLightStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "evident" }); + */ + System.err.println("add tests"); + fail(); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestSwedishLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/test/org/apache/solr/analysis/TestEnglishMinimalStemFilterFactory.java =================================================================== --- solr/src/test/org/apache/solr/analysis/TestEnglishMinimalStemFilterFactory.java (revision 0) +++ solr/src/test/org/apache/solr/analysis/TestEnglishMinimalStemFilterFactory.java (revision 0) @@ -0,0 +1,36 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the English minimal stem factory is working. + */ +public class TestEnglishMinimalStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + Reader reader = new StringReader("bricks"); + EnglishMinimalStemFilterFactory factory = new EnglishMinimalStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "brick" }); + } +} Property changes on: solr\src\test\org\apache\solr\analysis\TestEnglishMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter; + +/** Factory for {@link PortugueseMinimalStemFilter} */ +public class PortugueseMinimalStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new PortugueseMinimalStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\PortugueseMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.fr.FrenchLightStemFilter; + +/** Factory for {@link FrenchLightStemFilter} */ +public class FrenchLightStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new FrenchLightStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\FrenchLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.de.GermanLightStemFilter; + +/** Factory for {@link GermanLightStemFilter} */ +public class GermanLightStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new GermanLightStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\GermanLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.it.ItalianLightStemFilter; + +/** Factory for {@link ItalianLightStemFilter} */ +public class ItalianLightStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new ItalianLightStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\ItalianLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter; + +/** Factory for {@link FrenchMinimalStemFilter} */ +public class FrenchMinimalStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new FrenchMinimalStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\FrenchMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.de.GermanMinimalStemFilter; + +/** Factory for {@link GermanMinimalStemFilter} */ +public class GermanMinimalStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new GermanMinimalStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\GermanMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.pt.PortugueseLightStemFilter; + +/** Factory for {@link PortugueseLightStemFilter} */ +public class PortugueseLightStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new PortugueseLightStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\PortugueseLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.es.SpanishLightStemFilter; + +/** Factory for {@link SpanishLightStemFilter} */ +public class SpanishLightStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new SpanishLightStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\SpanishLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.sv.SwedishLightStemFilter; + +/** Factory for {@link SwedishLightStemFilter} */ +public class SwedishLightStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new SwedishLightStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\SwedishLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; + +/** Factory for {@link EnglishMinimalStemFilter} */ +public class EnglishMinimalStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new EnglishMinimalStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\EnglishMinimalStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.hu.HungarianLightStemFilter; + +/** Factory for {@link HungarianLightStemFilter} */ +public class HungarianLightStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new HungarianLightStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\HungarianLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java (revision 0) +++ solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ru.RussianLightStemFilter; + +/** Factory for {@link RussianLightStemFilter} */ +public class RussianLightStemFilterFactory extends BaseTokenFilterFactory { + public TokenStream create(TokenStream input) { + return new RussianLightStemFilter(input); + } +} Property changes on: solr\src\java\org\apache\solr\analysis\RussianLightStemFilterFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (revision 0) @@ -0,0 +1,53 @@ +package org.apache.lucene.analysis.de; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link GermanMinimalStemFilter} + */ +public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new GermanMinimalStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + checkOneTerm(analyzer, "sängerinnen", "sangerin"); + checkOneTerm(analyzer, "frauen", "frau"); + checkOneTerm(analyzer, "kenntnisse", "kenntnis"); + checkOneTerm(analyzer, "staates", "staat"); + checkOneTerm(analyzer, "bilder", "bild"); + checkOneTerm(analyzer, "boote", "boot"); + checkOneTerm(analyzer, "götter", "gott"); + checkOneTerm(analyzer, "äpfel", "apfel"); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\de\TestGermanMinimalStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.analysis.de; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link GermanLightStemFilter} + */ +public class TestGermanLightStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new GermanLightStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + System.err.println("add tests"); + fail(); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\de\TestGermanLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.analysis.sv; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link SwedishLightStemFilter} + */ +public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new SwedishLightStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + System.err.println("add tests"); + fail(); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\sv\TestSwedishLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (revision 0) @@ -0,0 +1,55 @@ +package org.apache.lucene.analysis.fr; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link FrenchMinimalStemFilter} + */ +public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + checkOneTerm(analyzer, "chevaux", "cheval"); + checkOneTerm(analyzer, "hiboux", "hibou"); + + checkOneTerm(analyzer, "chantés", "chant"); + checkOneTerm(analyzer, "chanter", "chant"); + checkOneTerm(analyzer, "chante", "chant"); + + checkOneTerm(analyzer, "baronnes", "baron"); + checkOneTerm(analyzer, "barons", "baron"); + checkOneTerm(analyzer, "baron", "baron"); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\fr\TestFrenchMinimalStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (revision 0) @@ -0,0 +1,155 @@ +package org.apache.lucene.analysis.fr; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link FrenchLightStemFilter} + */ +public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new FrenchLightStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + checkOneTerm(analyzer, "chevaux", "cheval"); + checkOneTerm(analyzer, "cheval", "cheval"); + + checkOneTerm(analyzer, "hiboux", "hibou"); + checkOneTerm(analyzer, "hibou", "hibou"); + + checkOneTerm(analyzer, "chantés", "chant"); + checkOneTerm(analyzer, "chanter", "chant"); + checkOneTerm(analyzer, "chante", "chant"); + checkOneTerm(analyzer, "chant", "chant"); + + checkOneTerm(analyzer, "baronnes", "baron"); + checkOneTerm(analyzer, "barons", "baron"); + checkOneTerm(analyzer, "baron", "baron"); + + checkOneTerm(analyzer, "peaux", "peau"); + checkOneTerm(analyzer, "peau", "peau"); + + checkOneTerm(analyzer, "anneaux", "aneau"); + checkOneTerm(analyzer, "anneau", "aneau"); + + checkOneTerm(analyzer, "neveux", "neveu"); + checkOneTerm(analyzer, "neveu", "neveu"); + + checkOneTerm(analyzer, "affreux", "afreu"); + checkOneTerm(analyzer, "affreuse", "afreu"); + + checkOneTerm(analyzer, "investissement", "investi"); + checkOneTerm(analyzer, "investir", "investi"); + + checkOneTerm(analyzer, "assourdissant", "asourdi"); + checkOneTerm(analyzer, "assourdir", "asourdi"); + + checkOneTerm(analyzer, "pratiquement", "pratiqu"); + checkOneTerm(analyzer, "pratique", "pratiqu"); + + checkOneTerm(analyzer, "administrativement", "administratif"); + checkOneTerm(analyzer, "administratif", "administratif"); + + checkOneTerm(analyzer, "justificatrice", "justifi"); + checkOneTerm(analyzer, "justificateur", "justifi"); + checkOneTerm(analyzer, "justifier", "justifi"); + + checkOneTerm(analyzer, "educatrice", "eduqu"); + checkOneTerm(analyzer, "eduquer", "eduqu"); + + checkOneTerm(analyzer, "communicateur", "comuniqu"); + checkOneTerm(analyzer, "communiquer", "comuniqu"); + + checkOneTerm(analyzer, "accompagnatrice", "acompagn"); + checkOneTerm(analyzer, "accompagnateur", "acompagn"); + + checkOneTerm(analyzer, "administrateur", "administr"); + checkOneTerm(analyzer, "administrer", "administr"); + + checkOneTerm(analyzer, "productrice", "product"); + checkOneTerm(analyzer, "producteur", "product"); + + checkOneTerm(analyzer, "acheteuse", "achet"); + checkOneTerm(analyzer, "acheteur", "achet"); + + checkOneTerm(analyzer, "planteur", "plant"); + checkOneTerm(analyzer, "plante", "plant"); + + checkOneTerm(analyzer, "poreuse", "poreu"); + checkOneTerm(analyzer, "poreux", "poreu"); + + checkOneTerm(analyzer, "plieuse", "plieu"); + + checkOneTerm(analyzer, "bijoutière", "bijouti"); + checkOneTerm(analyzer, "bijoutier", "bijouti"); + + checkOneTerm(analyzer, "caissière", "caisi"); + checkOneTerm(analyzer, "caissier", "caisi"); + + checkOneTerm(analyzer, "abrasive", "abrasif"); + checkOneTerm(analyzer, "abrasif", "abrasif"); + + checkOneTerm(analyzer, "folle", "fou"); + checkOneTerm(analyzer, "fou", "fou"); + + checkOneTerm(analyzer, "personnelle", "person"); + checkOneTerm(analyzer, "personne", "person"); + + // algo bug: too short length + //checkOneTerm(analyzer, "personnel", "person"); + + checkOneTerm(analyzer, "complète", "complet"); + checkOneTerm(analyzer, "complet", "complet"); + + checkOneTerm(analyzer, "aromatique", "aromat"); + + checkOneTerm(analyzer, "faiblesse", "faibl"); + checkOneTerm(analyzer, "faible", "faibl"); + + checkOneTerm(analyzer, "patinage", "patin"); + checkOneTerm(analyzer, "patin", "patin"); + + checkOneTerm(analyzer, "sonorisation", "sono"); + + checkOneTerm(analyzer, "ritualisation", "rituel"); + checkOneTerm(analyzer, "rituel", "rituel"); + + // algo bug: masked by rules above + //checkOneTerm(analyzer, "colonisateur", "colon"); + + checkOneTerm(analyzer, "nomination", "nomin"); + + checkOneTerm(analyzer, "disposition", "dispos"); + checkOneTerm(analyzer, "dispose", "dispos"); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\fr\TestFrenchLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.analysis.hu; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link HungarianLightStemFilter} + */ +public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new HungarianLightStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + System.err.println("add tests"); + fail(); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\hu\TestHungarianLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (revision 0) @@ -0,0 +1,62 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link PortugueseMinimalStemFilter} + */ +public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source); + return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(result)); + } + }; + + /** + * Test the example from the paper "Assessing the impact of stemming accuracy + * on information retrieval" + */ + public void testExamples() throws IOException { + assertAnalyzesTo( + analyzer, + "O debate político, pelo menos o que vem a público, parece, de modo nada " + + "surpreendente, restrito a temas menores. Mas há, evidentemente, " + + "grandes questões em jogo nas eleições que se aproximam.", + new String[] { + "o", "debate", "político", "pelo", "menos", "o", "que", "vem", "a", + "público", "parece", "de", "modo", "nada", "surpreendente", "restrito", + "a", "tema", "menor", "mas", "há", "evidentemente", "grande", "questão", + "em", "jogo", "na", "eleição", "que", "se", "aproximam" + }); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\pt\TestPortugueseMinimalStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (revision 0) @@ -0,0 +1,88 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link PortugueseLightStemFilter} + */ +public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source); + return new TokenStreamComponents(source, new PortugueseLightStemFilter(result)); + } + }; + + /** + * Test the example from the paper "Assessing the impact of stemming accuracy + * on information retrieval" + */ + public void testExamples() throws IOException { + assertAnalyzesTo( + analyzer, + "O debate político, pelo menos o que vem a público, parece, de modo nada " + + "surpreendente, restrito a temas menores. Mas há, evidentemente, " + + "grandes questões em jogo nas eleições que se aproximam.", + new String[] { + "o", "debat", "politic", "pelo", "meno", "o", "que", "vem", "a", + "public", "parec", "de", "modo", "nada", "surpreendent", "restrit", + "a", "tema", "menor", "mas", "há", "evident", "grand", "questa", + "em", "jogo", "nas", "eleica", "que", "se", "aproximam" + }); + } + + /** + * Test examples from the c implementation + */ + public void testMoreExamples() throws IOException { + checkOneTerm(analyzer, "doutores", "doutor"); + checkOneTerm(analyzer, "doutor", "doutor"); + + checkOneTerm(analyzer, "homens", "homem"); + checkOneTerm(analyzer, "homem", "homem"); + + checkOneTerm(analyzer, "papéis", "papel"); + checkOneTerm(analyzer, "papel", "papel"); + + checkOneTerm(analyzer, "normais", "normal"); + checkOneTerm(analyzer, "normal", "normal"); + + checkOneTerm(analyzer, "lencóis", "lencol"); + checkOneTerm(analyzer, "lencol", "lencol"); + + checkOneTerm(analyzer, "barris", "barril"); + checkOneTerm(analyzer, "barril", "barril"); + + checkOneTerm(analyzer, "botões", "bota"); + checkOneTerm(analyzer, "botão", "bota"); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\pt\TestPortugueseLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.analysis.ru; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link RussianLightStemFilter} + */ +public class TestRussianLightStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new RussianLightStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + System.err.println(""); + fail(); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\ru\TestRussianLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (revision 0) @@ -0,0 +1,54 @@ +package org.apache.lucene.analysis.en; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link EnglishMinimalStemFilter} + */ +public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source)); + } + }; + + /** Test some examples from various papers about this technique */ + public void testExamples() throws IOException { + checkOneTerm(analyzer, "queries", "query"); + checkOneTerm(analyzer, "phrases", "phrase"); + checkOneTerm(analyzer, "corpus", "corpus"); + checkOneTerm(analyzer, "stress", "stress"); + checkOneTerm(analyzer, "kings", "king"); + checkOneTerm(analyzer, "panels", "panel"); + checkOneTerm(analyzer, "aerodynamics", "aerodynamic"); + checkOneTerm(analyzer, "congress", "congress"); + checkOneTerm(analyzer, "serious", "serious"); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\en\TestEnglishMinimalStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.analysis.es; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link SpanishLightStemFilter} + */ +public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new SpanishLightStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + System.err.println("add tests"); + fail(); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\es\TestSpanishLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.analysis.it; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link ItalianLightStemFilter} + */ +public class TestItalianLightStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(source, new ItalianLightStemFilter(source)); + } + }; + + /** Test some examples from the paper */ + public void testExamples() throws IOException { + System.err.println("add tests"); + fail(); + } +} Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\it\TestItalianLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.de; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link GermanMinimalStemmer} to stem German + * words. + *
+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class GermanMinimalStemFilter extends TokenFilter { + private final GermanMinimalStemmer stemmer = new GermanMinimalStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public GermanMinimalStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\de\GermanMinimalStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.de; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link GermanLightStemmer} to stem German + * words. + *+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class GermanLightStemFilter extends TokenFilter { + private final GermanLightStemmer stemmer = new GermanLightStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public GermanLightStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\de\GermanLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java (revision 0) @@ -0,0 +1,95 @@ +package org.apache.lucene.analysis.de; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Minimal Stemmer for German. + *+ * This stemmer implements the following algorithm: + * Morphologie et recherche d'information + * Jacques Savoy. + */ +public class GermanMinimalStemmer { + + public int stem(char s[], int len) { + if (len < 5) + return len; + + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'ä': s[i] = 'a'; break; + case 'ö': s[i] = 'o'; break; + case 'ü': s[i] = 'u'; break; + } + + if (len > 6 && s[len-3] == 'n' && s[len-2] == 'e' && s[len-1] == 'n') + return len - 3; + + if (len > 5) + switch(s[len-1]) { + case 'n': if (s[len-2] == 'e') return len - 2; + case 'e': if (s[len-2] == 's') return len - 2; + case 's': if (s[len-2] == 'e') return len - 2; + case 'r': if (s[len-2] == 'e') return len - 2; + } + + switch(s[len-1]) { + case 'n': + case 'e': + case 's': + case 'r': return len - 1; + } + + return len; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\de\GermanMinimalStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java (revision 0) @@ -0,0 +1,134 @@ +package org.apache.lucene.analysis.de; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for German. + */ +public class GermanLightStemmer { + + public int stem(char s[], int len) { + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'ä': + case 'à': + case 'á': + case 'â': s[i] = 'a'; break; + case 'ö': + case 'ò': + case 'ó': + case 'ô': s[i] = 'o'; break; + case 'ï': + case 'ì': + case 'í': + case 'î': s[i] = 'i'; break; + case 'ü': + case 'ù': + case 'ú': + case 'û': s[i] = 'u'; break; + } + + len = step1(s, len); + return step2(s, len); + } + + private boolean stEnding(char ch) { + switch(ch) { + case 'b': + case 'd': + case 'f': + case 'g': + case 'h': + case 'k': + case 'l': + case 'm': + case 'n': + case 't': return true; + default: return false; + } + } + + private int step1(char s[], int len) { + if (len > 5 && s[len-3] == 'e' && s[len-2] == 'r' && s[len-1] == 'n') + return len - 3; + + if (len > 4 && s[len-2] == 'e') + switch(s[len-1]) { + case 'm': + case 'n': + case 'r': + case 's': return len - 2; + } + + if (len > 3 && s[len-1] == 'e') + return len - 1; + + if (len > 3 && s[len-1] == 's' && stEnding(s[len-2])) + return len - 1; + + return len; + } + + private int step2(char s[], int len) { + if (len > 5 && s[len-3] == 'e' && s[len-2] == 's' && s[len-3] == 't') + return len - 3; + + if (len > 4 && s[len-2] == 'e' && (s[len-1] == 'r' || s[len-1] == 'n')) + return len - 2; + + if (len > 4 && s[len-2] == 's' && s[len-1] == 't' && stEnding(s[len-3])) + return len - 2; + + return len; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\de\GermanLightStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java (revision 0) @@ -0,0 +1,124 @@ +package org.apache.lucene.analysis.sv; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for Swedish. + */ +public class SwedishLightStemmer { + + public int stem(char s[], int len) { + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'ä': + case 'å': s[i] = 'a'; break; + case 'ö': s[i] = 'o'; break; + } + + if (len > 4 && s[len-1] == 's') + len--; + + if (len > 7 && + (endsWith(s, len, "elser") || + endsWith(s, len, "heten"))) + return len - 5; + + if (len > 6 && + (endsWith(s, len, "arne") || + endsWith(s, len, "erna") || + endsWith(s, len, "ande") || + endsWith(s, len, "else") || + endsWith(s, len, "aste") || + endsWith(s, len, "orna") || + endsWith(s, len, "aren"))) + return len - 4; + + if (len > 5 && + (endsWith(s, len, "are") || + endsWith(s, len, "ast") || + endsWith(s, len, "het"))) + return len - 3; + + if (len > 4 && + (endsWith(s, len, "ar") || + endsWith(s, len, "er") || + endsWith(s, len, "or") || + endsWith(s, len, "en") || + endsWith(s, len, "at") || + endsWith(s, len, "te") || + endsWith(s, len, "et"))) + return len - 2; + + if (len > 3) + switch(s[len-1]) { + case 't': + case 'a': + case 'e': + case 'n': return len - 1; + } + + return len; + } + + private boolean endsWith(char s[], int len, String suffix) { + int suffixLen = suffix.length(); + if (suffixLen > len) + return false; + + for (int i = suffixLen - 1; i >= 0; i--) + if (s[len - (suffixLen - i)] != suffix.charAt(i)) + return false; + + return true; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\sv\SwedishLightStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.sv; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link SwedishLightStemmer} to stem Swedish + * words. + *
+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class SwedishLightStemFilter extends TokenFilter { + private final SwedishLightStemmer stemmer = new SwedishLightStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public SwedishLightStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\sv\SwedishLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java (revision 0) @@ -0,0 +1,97 @@ +package org.apache.lucene.analysis.fr; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for French. + *+ * This stemmer implements the following algorithm: + * A Stemming procedure and stopword list for general French corpora. + * Jacques Savoy. + */ +public class FrenchMinimalStemmer { + +/* + * For words of five or more letters + * if the final letter is «-x» then + * if final is «-aux» then replace final «-aux» by «-al» + * (e.g., chevaux -> cheval) + * otherwise, remove final «-x» (e.g., hiboux -> hibou) + * otherwise (words not ending with «-x») + * if final letter is «-s» then remove final «-s» (e.g., chantés -> chanté) + * if final letter is «-r» then remove final «-r» (e.g., chanter -> chante) + * if final letter is «-e» then remove final «-e»(e.g., chante -> chant) + * if final letter is «-é» then remove final «-é»(e.g., chanté-> chant) + * (a simple recoding rule, e.g., baronn-> baron) + * if final two letters are the same, remove final letter + * otherwise does not alter words of four or less letters + */ + + public int stem(char s[], int len) { + if (len < 6) + return len; + + if (s[len-1] == 'x') { + if (s[len-3] == 'a' && s[len-2] == 'u') + s[len-2] = 'l'; + return len - 1; + } + + if (s[len-1] == 's') len--; + if (s[len-1] == 'r') len--; + if (s[len-1] == 'e') len--; + if (s[len-1] == 'é') len--; + if (s[len-1] == s[len-2]) len--; + return len; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\fr\FrenchMinimalStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java (revision 0) @@ -0,0 +1,280 @@ +package org.apache.lucene.analysis.fr; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for French. + */ +public class FrenchLightStemmer { + + public int stem(char s[], int len) { + if (len > 5 && s[len-1] == 'x') { + if (s[len-3] == 'a' && s[len-2] == 'u' && s[len-4] != 'e') + s[len-2] = 'l'; + len--; + } + + if (len > 3 && s[len-1] == 'x') + len--; + + if (len > 3 && s[len-1] == 's') + len--; + + if (len > 9 && endsWith(s, len, "issement")) { + len -= 6; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 8 && endsWith(s, len, "issant")) { + len -= 4; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 6 && endsWith(s, len, "ement")) { + len -= 4; + if (len > 3 && endsWith(s, len, "ive")) { + len--; + s[len-1] = 'f'; + } + return norm(s, len); + } + + if (len > 11 && endsWith(s, len, "ficatrice")) { + len -= 5; + s[len-2] = 'e'; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 10 && endsWith(s, len, "ficateur")) { + len -= 4; + s[len-2] = 'e'; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 9 && endsWith(s, len, "catrice")) { + len -= 3; + s[len-4] = 'q'; + s[len-3] = 'u'; + s[len-2] = 'e'; + //s[len-1] = 'r' <-- unnecessary, already 'r'. + return norm(s, len); + } + + if (len > 8 && endsWith(s, len, "cateur")) { + len -= 2; + s[len-4] = 'q'; + s[len-3] = 'u'; + s[len-2] = 'e'; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 8 && endsWith(s, len, "atrice")) { + len -= 4; + s[len-2] = 'e'; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 7 && endsWith(s, len, "ateur")) { + len -= 3; + s[len-2] = 'e'; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 6 && endsWith(s, len, "trice")) { + len--; + s[len-3] = 'e'; + s[len-2] = 'u'; + s[len-1] = 'r'; + } + + if (len > 5 && endsWith(s, len, "ième")) + return norm(s, len-4); + + if (len > 7 && endsWith(s, len, "teuse")) { + len -= 2; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 6 && endsWith(s, len, "teur")) { + len--; + s[len-1] = 'r'; + return norm(s, len); + } + + if (len > 5 && endsWith(s, len, "euse")) + return norm(s, len-2); + + if (len > 8 && endsWith(s, len, "ère")) { + len--; + s[len-2] = 'e'; + return norm(s, len); + } + + if (len > 7 && endsWith(s, len, "ive")) { + len--; + s[len-1] = 'f'; + return norm(s, len); + } + + if (len > 4 && + (endsWith(s, len, "folle") || + endsWith(s, len, "molle"))) { + len -= 2; + s[len-1] = 'u'; + return norm(s, len); + } + + if (len > 9 && endsWith(s, len, "nnelle")) + return norm(s, len-5); + + if (len > 9 && endsWith(s, len, "nnel")) + return norm(s, len-3); + + if (len > 4 && endsWith(s, len, "ète")) { + len--; + s[len-2] = 'e'; + } + + if (len > 8 && endsWith(s, len, "ique")) + len -= 4; + + if (len > 8 && endsWith(s, len, "esse")) + return norm(s, len-3); + + if (len > 7 && endsWith(s, len, "inage")) + return norm(s, len-3); + + if (len > 9 && endsWith(s, len, "isation")) { + len -= 7; + if (len > 5 && endsWith(s, len, "ual")) + s[len-2] = 'e'; + return norm(s, len); + } + + if (len > 9 && endsWith(s, len, "isateur")) + return norm(s, len-7); + + if (len > 8 && endsWith(s, len, "ation")) + return norm(s, len-5); + + if (len > 8 && endsWith(s, len, "ition")) + return norm(s, len-5); + + return norm(s, len); + } + + private int norm(char s[], int len) { + if (len > 4) { + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'à': + case 'á': + case 'â': s[i] = 'a'; break; + case 'ô': s[i] = 'o'; break; + case 'è': + case 'é': + case 'ê': s[i] = 'e'; break; + case 'ù': + case 'û': s[i] = 'u'; break; + case 'î': s[i] = 'i'; break; + case 'ç': s[i] = 'c'; break; + } + + char ch = s[0]; + for (int i = 1; i < len; i++) { + if (s[i] == ch) + len = delete(s, i--, len); + else + ch = s[i]; + } + } + + if (len > 4 && endsWith(s, len, "ie")) + len -= 2; + + if (len > 4) { + if (s[len-1] == 'r') len--; + if (s[len-1] == 'e') len--; + if (s[len-1] == 'e') len--; + if (s[len-1] == s[len-2]) len--; + } + return len; + } + + private boolean endsWith(char s[], int len, String suffix) { + int suffixLen = suffix.length(); + if (suffixLen > len) + return false; + + for (int i = suffixLen - 1; i >= 0; i--) + if (s[len - (suffixLen - i)] != suffix.charAt(i)) + return false; + + return true; + } + + private int delete(char s[], int pos, int len) { + if (pos < len) + System.arraycopy(s, pos + 1, s, pos, len - pos - 1); + + return len - 1; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\fr\FrenchLightStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.fr; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link FrenchMinimalStemmer} to stem French + * words. + *
+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class FrenchMinimalStemFilter extends TokenFilter { + private final FrenchMinimalStemmer stemmer = new FrenchMinimalStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public FrenchMinimalStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\fr\FrenchMinimalStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.fr; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link FrenchLightStemmer} to stem French + * words. + *+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class FrenchLightStemFilter extends TokenFilter { + private final FrenchLightStemmer stemmer = new FrenchLightStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public FrenchLightStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\fr\FrenchLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java (revision 0) @@ -0,0 +1,240 @@ +package org.apache.lucene.analysis.hu; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for Hungarian + */ +public class HungarianLightStemmer { + public int stem(char s[], int len) { + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'á': s[i] = 'a'; break; + case 'ë': + case 'é': s[i] = 'e'; break; + case 'í': s[i] = 'i'; break; + case 'ó': + case 'ő': + case 'ö': s[i] = 'o'; break; + case 'ú': + case 'ű': + case 'ü': s[i] = 'u'; break; + } + + len = removeCase(s, len); + len = removePossessive(s, len); + len = removePlural(s, len); + return normalize(s, len); + } + + private int removeCase(char s[], int len) { + if (len > 6 && endsWith(s, len, "kent")) + return len - 4; + + if (len > 5) { + if (endsWith(s, len, "nak") || + endsWith(s, len, "nek") || + endsWith(s, len, "val") || + endsWith(s, len, "vel") || + endsWith(s, len, "ert") || + endsWith(s, len, "rol") || + endsWith(s, len, "ban") || + endsWith(s, len, "ben") || + endsWith(s, len, "bol") || + endsWith(s, len, "nal") || + endsWith(s, len, "nel") || + endsWith(s, len, "hoz") || + endsWith(s, len, "hez") || + endsWith(s, len, "tol")) + return len - 3; + + if (endsWith(s, len, "al") || endsWith(s, len, "el")) { + if (!isVowel(s[len-3]) && s[len-3] == s[len-4]) + return len - 3; + } + } + + if (len > 4) { + if (endsWith(s, len, "at") || + endsWith(s, len, "et") || + endsWith(s, len, "ot") || + endsWith(s, len, "va") || + endsWith(s, len, "ve") || + endsWith(s, len, "ra") || + endsWith(s, len, "re") || + endsWith(s, len, "ba") || + endsWith(s, len, "be") || + endsWith(s, len, "ul") || + endsWith(s, len, "ig")) + return len - 2; + + if ((endsWith(s, len, "on") || endsWith(s, len, "en")) && !isVowel(s[len-3])) + return len - 2; + + switch(s[len-1]) { + case 't': + case 'n': return len - 1; + case 'a': + case 'e': if (s[len-2] == s[len-3] && !isVowel(s[len-2])) return len - 2; + } + } + + return len; + } + + private int removePossessive(char s[], int len) { + if (len > 6) { + if (!isVowel(s[len-5]) && + (endsWith(s, len, "atok") || + endsWith(s, len, "otok") || + endsWith(s, len, "etek"))) + return len - 4; + + if (endsWith(s, len, "itek") || endsWith(s, len, "itok")) + return len - 4; + } + + if (len > 5) { + if (!isVowel(s[len-4]) && + (endsWith(s, len, "unk") || + endsWith(s, len, "tok") || + endsWith(s, len, "tek"))) + return len - 3; + + if (isVowel(s[len-4]) && endsWith(s, len, "juk")) + return len - 3; + + if (endsWith(s, len, "ink")) + return len - 3; + } + + if (len > 4) { + if (!isVowel(s[len-3]) && + (endsWith(s, len, "am") || + endsWith(s, len, "em") || + endsWith(s, len, "om") || + endsWith(s, len, "ad") || + endsWith(s, len, "ed") || + endsWith(s, len, "od") || + endsWith(s, len, "uk"))) + return len - 2; + + if (isVowel(s[len-3]) && + (endsWith(s, len, "nk") || + endsWith(s, len, "ja") || + endsWith(s, len, "je"))) + return len - 2; + + if (endsWith(s, len, "im") || + endsWith(s, len, "id") || + endsWith(s, len, "ik")) + return len - 2; + } + + if (len > 3) + switch(s[len-1]) { + case 'a': + case 'e': if (!isVowel(s[len-2])) return len - 1; break; + case 'd': if (isVowel(s[len-2])) return len - 1; break; + case 'i': return len - 1; + } + + return len; + } + + private int removePlural(char s[], int len) { + if (len > 3 && s[len-1] == 'k') + switch(s[len-2]) { + case 'a': + case 'o': + case 'e': if (len > 4) return len - 2; /* intentional fallthru */ + default: return len - 1; + } + return len; + } + + private int normalize(char s[], int len) { + if (len > 3) + switch(s[len-1]) { + case 'a': + case 'e': + case 'i': + case 'o': return len - 1; + } + return len; + } + + private boolean isVowel(char ch) { + switch(ch) { + case 'a': + case 'e': + case 'i': + case 'o': + case 'u': + case 'y': return true; + default: return false; + } + } + + private boolean endsWith(char s[], int len, String suffix) { + int suffixLen = suffix.length(); + if (suffixLen > len) + return false; + + for (int i = suffixLen - 1; i >= 0; i--) + if (s[len - (suffixLen - i)] != suffix.charAt(i)) + return false; + + return true; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\hu\HungarianLightStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.hu; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link HungarianLightStemmer} to stem + * Hungarian words. + *+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class HungarianLightStemFilter extends TokenFilter { + private final HungarianLightStemmer stemmer = new HungarianLightStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public HungarianLightStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\hu\HungarianLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java (revision 0) @@ -0,0 +1,119 @@ +package org.apache.lucene.analysis.pt; + +import java.util.Arrays; + +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Minimal Stemmer for Portuguese + *+ * This follows the "RSLP-S" algorithm presented in: + * A study on the Use of Stemming for Monolingual Ad-Hoc Portuguese + * Information Retrieval (Orengo, et al) + * which is just the plural reduction step of the RSLP + * algorithm from A Stemming Algorithmm for the Portuguese Language, + * Orengo et al. + */ +public class PortugueseMinimalStemmer { + + private static final CharArraySet excIS = new CharArraySet(Version.LUCENE_31, + Arrays.asList("lápis", "cais", "mais", "crúcis", "biquínis", "pois", + "depois","dois","leis"), + false); + + private static final CharArraySet excS = new CharArraySet(Version.LUCENE_31, + Arrays.asList("aliás", "pires", "lápis", "cais", "mais", "mas", "menos", + "férias", "fezes", "pêsames", "crúcis", "gás", "atrás", "moisés", + "através", "convés", "ês", "país", "após", "ambas", "ambos", + "messias", "depois"), + false); + + public int stem(char s[], int len) { + if (len < 3 || s[len-1] != 's') + return len; + + if (s[len-2] == 'n') { + len--; + s[len-1] = 'm'; + return len; + } + + if (len >= 6 && s[len-3] == 'õ' && s[len-2] == 'e') { + len--; + s[len-2] = 'ã'; + s[len-1] = 'o'; + return len; + } + + if (len >= 4 && s[len-3] == 'ã' && s[len-2] == 'e') + if (!(len == 4 && s[0] == 'm')) { + len--; + s[len-1] = 'o'; + return len; + } + + if (len >= 4 && s[len-2] == 'i') { + if (s[len-3] == 'a') + if (!(len == 4 && (s[0] == 'c' || s[0] == 'm'))) { + len--; + s[len-1] = 'l'; + return len; + } + + if (len >= 5 && s[len-3] == 'é') { + len--; + s[len-2] = 'e'; + s[len-1] = 'l'; + return len; + } + + if (len >= 5 && s[len-3] == 'e') { + len--; + s[len-1] = 'l'; + return len; + } + + if (len >= 5 && s[len-3] == 'ó') { + len--; + s[len-2] = 'o'; + s[len-1] = 'l'; + return len; + } + + if (!excIS.contains(s, 0, len)) { + s[len-1] = 'l'; + return len; + } + } + + if (len >= 6 && s[len-3] == 'l' && s[len-2] == 'e') + return len - 2; + + if (len >= 6 && s[len-3] == 'r' && s[len-2] == 'e') + if (!(len == 7 && s[0] == 'á' && s[1] == 'r' && s[2] == 'v' && s[3] == 'o')) + return len - 2; + + if (excS.contains(s, 0, len)) + return len; + else + return len-1; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\pt\PortugueseMinimalStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java (revision 0) @@ -0,0 +1,212 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for Portuguese + */ +public class PortugueseLightStemmer { + + public int stem(char s[], int len) { + if (len < 4) + return len; + + len = removeSuffix(s, len); + + if (len > 3 && s[len-1] == 'a') + len = normFeminine(s, len); + + if (len > 4) + switch(s[len-1]) { + case 'e': + case 'a': + case 'o': len--; break; + } + + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'à': + case 'á': + case 'â': + case 'ä': + case 'ã': s[i] = 'a'; break; + case 'ò': + case 'ó': + case 'ô': + case 'ö': + case 'õ': s[i] = 'o'; break; + case 'è': + case 'é': + case 'ê': + case 'ë': s[i] = 'e'; break; + case 'ù': + case 'ú': + case 'û': + case 'ü': s[i] = 'u'; break; + case 'ì': + case 'í': + case 'î': + case 'ï': s[i] = 'i'; break; + case 'ç': s[i] = 'c'; break; + } + + return len; + } + + private int removeSuffix(char s[], int len) { + if (len > 4 && endsWith(s, len, "es")) + switch(s[len-3]) { + case 'r': + case 's': + case 'l': + case 'z': return len - 2; + } + + if (len > 3 && endsWith(s, len, "ns")) { + s[len - 2] = 'm'; + return len - 1; + } + + if (len > 4 && (endsWith(s, len, "eis") || endsWith(s, len, "éis"))) { + s[len - 3] = 'e'; + s[len - 2] = 'l'; + return len - 1; + } + + if (len > 4 && endsWith(s, len, "ais")) { + s[len - 2] = 'l'; + return len - 1; + } + + if (len > 4 && endsWith(s, len, "óis")) { + s[len - 3] = 'o'; + s[len - 2] = 'l'; + return len - 1; + } + + if (len > 4 && endsWith(s, len, "is")) { + s[len - 1] = 'l'; + return len; + } + + if (len > 3 && + (endsWith(s, len, "ões") || + endsWith(s, len, "ães"))) { + len--; + s[len - 2] = 'ã'; + s[len - 1] = 'o'; + return len; + } + + if (len > 6 && endsWith(s, len, "mente")) + return len - 5; + + if (len > 3 && s[len-1] == 's') + return len - 1; + return len; + } + + private int normFeminine(char s[], int len) { + if (len > 7 && + (endsWith(s, len, "inha") || + endsWith(s, len, "iaca") || + endsWith(s, len, "eira"))) { + s[len - 1] = 'o'; + return len; + } + + if (len > 6) { + if (endsWith(s, len, "osa") || + endsWith(s, len, "ica") || + endsWith(s, len, "ida") || + endsWith(s, len, "ada") || + endsWith(s, len, "iva") || + endsWith(s, len, "ama")) { + s[len - 1] = 'o'; + return len; + } + + if (endsWith(s, len, "ona")) { + s[len - 3] = 'ã'; + s[len - 2] = 'o'; + return len - 1; + } + + if (endsWith(s, len, "ora")) + return len - 1; + + if (endsWith(s, len, "esa")) { + s[len - 3] = 'ê'; + return len - 1; + } + + if (endsWith(s, len, "na")) { + s[len - 1] = 'o'; + return len; + } + } + return len; + } + + private boolean endsWith(char s[], int len, String suffix) { + int suffixLen = suffix.length(); + if (suffixLen > len) + return false; + + for (int i = suffixLen - 1; i >= 0; i--) + if (s[len - (suffixLen - i)] != suffix.charAt(i)) + return false; + + return true; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\pt\PortugueseLightStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link PortugueseMinimalStemmer} to stem + * Portuguese words. + *
+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class PortugueseMinimalStemFilter extends TokenFilter { + private final PortugueseMinimalStemmer stemmer = new PortugueseMinimalStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public PortugueseMinimalStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\pt\PortugueseMinimalStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link PortugueseLightStemmer} to stem + * Portuguese words. + *+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class PortugueseLightStemFilter extends TokenFilter { + private final PortugueseLightStemmer stemmer = new PortugueseLightStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public PortugueseLightStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\pt\PortugueseLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java (revision 0) @@ -0,0 +1,163 @@ +package org.apache.lucene.analysis.ru; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for French. + *+ * This stemmer implements the following algorithm: + * Indexing and Searching Strategies for the Russian Language. + * Ljiljana Dolamic and Jacques Savoy. + */ +public class RussianLightStemmer { + + public int stem(char s[], int len) { + len = removeCase(s, len); + return normalize(s, len); + } + + private int normalize(char s[], int len) { + if (len > 3) + switch(s[len-1]) { + case 'ь': + case 'и': return len - 1; + case 'н': if (s[len-2] == 'н') return len - 1; + } + return len; + } + + private int removeCase(char s[], int len) { + if (len < 6 && + (endsWith(s, len, "иями") || + endsWith(s, len, "оями"))) + return len - 4; + + if (len > 5 && + (endsWith(s, len, "иям") || + endsWith(s, len, "иях") || + endsWith(s, len, "оях") || + endsWith(s, len, "ями") || + endsWith(s, len, "оям") || + endsWith(s, len, "оьв") || + endsWith(s, len, "ами") || + endsWith(s, len, "его") || + endsWith(s, len, "ему") || + endsWith(s, len, "ери") || + endsWith(s, len, "ими") || + endsWith(s, len, "ого") || + endsWith(s, len, "ому") || + endsWith(s, len, "ыми") || + endsWith(s, len, "оев"))) + return len - 3; + + if (len > 4 && + (endsWith(s, len, "ая") || + endsWith(s, len, "яя") || + endsWith(s, len, "ях") || + endsWith(s, len, "юю") || + endsWith(s, len, "ах") || + endsWith(s, len, "ею") || + endsWith(s, len, "их") || + endsWith(s, len, "ия") || + endsWith(s, len, "ию") || + endsWith(s, len, "ьв") || + endsWith(s, len, "ою") || + endsWith(s, len, "ую") || + endsWith(s, len, "ям") || + endsWith(s, len, "ых") || + endsWith(s, len, "ея") || + endsWith(s, len, "ам") || + endsWith(s, len, "ем") || + endsWith(s, len, "ей") || + endsWith(s, len, "ём") || + endsWith(s, len, "ев") || + endsWith(s, len, "ий") || + endsWith(s, len, "им") || + endsWith(s, len, "ое") || + endsWith(s, len, "ой") || + endsWith(s, len, "ом") || + endsWith(s, len, "ов") || + endsWith(s, len, "ые") || + endsWith(s, len, "ый") || + endsWith(s, len, "ым") || + endsWith(s, len, "ми"))) + return len - 2; + + if (len > 3) + switch(s[len-1]) { + case 'а': + case 'е': + case 'и': + case 'о': + case 'у': + case 'й': + case 'ы': + case 'я': + case 'ь': return len - 1; + } + + return len; + } + + private boolean endsWith(char s[], int len, String suffix) { + int suffixLen = suffix.length(); + if (suffixLen > len) + return false; + + for (int i = suffixLen - 1; i >= 0; i--) + if (s[len - (suffixLen - i)] != suffix.charAt(i)) + return false; + + return true; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\ru\RussianLightStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.ru; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link RussianLightStemmer} to stem Russian + * words. + *
+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class RussianLightStemFilter extends TokenFilter { + private final RussianLightStemmer stemmer = new RussianLightStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public RussianLightStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\ru\RussianLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.en; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link EnglishMinimalStemmer} to stem + * English words. + *+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class EnglishMinimalStemFilter extends TokenFilter { + private final EnglishMinimalStemmer stemmer = new EnglishMinimalStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public EnglishMinimalStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\en\EnglishMinimalStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java (revision 0) @@ -0,0 +1,45 @@ +package org.apache.lucene.analysis.en; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Minimal plural stemmer for English. + *+ * This stemmer implements the "S-Stemmer" from + * How Effective Is Suffixing? + * Donna Harman. + */ +public class EnglishMinimalStemmer { + public int stem(char s[], int len) { + if (len < 3 || s[len-1] != 's') + return len; + + switch(s[len-2]) { + case 'u': + case 's': return len; + case 'e': + if (len > 3 && s[len-3] == 'i' && s[len-4] != 'a' && s[len-4] != 'e') { + s[len - 3] = 'y'; + return len - 2; + } + if (s[len-3] == 'i' || s[len-3] == 'a' || s[len-3] == 'o' || s[len-3] == 'e') + return len; + default: return len - 1; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\en\EnglishMinimalStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java (revision 0) @@ -0,0 +1,103 @@ +package org.apache.lucene.analysis.es; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for Spanish + */ +public class SpanishLightStemmer { + + public int stem(char s[], int len) { + if (len < 5) + return len; + + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'à': + case 'á': + case 'â': + case 'ä': s[i] = 'a'; break; + case 'ò': + case 'ó': + case 'ô': + case 'ö': s[i] = 'o'; break; + case 'è': + case 'é': + case 'ê': + case 'ë': s[i] = 'e'; break; + case 'ù': + case 'ú': + case 'û': + case 'ü': s[i] = 'u'; break; + case 'ì': + case 'í': + case 'î': + case 'ï': s[i] = 'i'; break; + } + + switch(s[len-1]) { + case 'o': + case 'a': + case 'e': return len - 1; + case 's': + if (s[len-2] == 'e' && s[len-3] == 'c') { + s[len-3] = 'z'; + return len - 1; + } + if (s[len-2] == 'o' || s[len-2] == 'a' || s[len-2] == 'e') + return len - 2; + } + + return len; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\es\SpanishLightStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.es; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link SpanishLightStemmer} to stem Spanish + * words. + *
+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class SpanishLightStemFilter extends TokenFilter { + private final SpanishLightStemmer stemmer = new SpanishLightStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public SpanishLightStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\es\SpanishLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java (revision 0) @@ -0,0 +1,113 @@ +package org.apache.lucene.analysis.it; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This algorithm is updated based on code located at: + * http://members.unine.ch/jacques.savoy/clef/ + * + * Full copyright for that code follows: + */ + +/* + * Copyright (c) 2005, Jacques Savoy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. Neither the name of the author nor the names + * of its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Light Stemmer for Italian + */ +public class ItalianLightStemmer { + + public int stem(char s[], int len) { + if (len < 6) + return len; + + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'à': + case 'á': + case 'â': + case 'ä': s[i] = 'a'; break; + case 'ò': + case 'ó': + case 'ô': + case 'ö': s[i] = 'o'; break; + case 'è': + case 'é': + case 'ê': + case 'ë': s[i] = 'e'; break; + case 'ù': + case 'ú': + case 'û': + case 'ü': s[i] = 'u'; break; + case 'ì': + case 'í': + case 'î': + case 'ï': s[i] = 'i'; break; + } + + switch(s[len-1]) { + case 'e': + if (s[len-2] == 'i' || s[len-2] == 'h') + return len - 2; + else + return len - 1; + case 'i': + if (s[len-2] == 'h' || s[len-2] == 'i') + return len - 2; + else + return len - 1; + case 'a': + if (s[len-2] == 'i') + return len - 2; + else + return len - 1; + case 'o': + if (s[len-1] == 'i') + return len - 2; + else + return len - 1; + } + + return len; + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\it\ItalianLightStemmer.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.it; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link ItalianLightStemmer} to stem Italian + * words. + *+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ */ +public final class ItalianLightStemFilter extends TokenFilter { + private final ItalianLightStemmer stemmer = new ItalianLightStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public ItalianLightStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\it\ItalianLightStemFilter.java ___________________________________________________________________ Added: svn:eol-style + native