Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java (working copy) @@ -0,0 +1,63 @@ +package org.apache.lucene.analysis.ga; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.util.CharArraySet; + +public class TestIrishAnalyzer extends BaseTokenStreamTestCase { + /** This test fails with NPE when the + * stopwords file is missing in classpath */ + public void testResourcesAvailable() { + new IrishAnalyzer(TEST_VERSION_CURRENT); + } + + /** test stopwords and stemming */ + public void testBasics() throws IOException { + Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); + // stemming + checkOneTermReuse(a, "siopadóireacht", "siopadóir"); + checkOneTermReuse(a, "síceapatacha", "síceapaite"); + // stopword + assertAnalyzesTo(a, "le", new String[] { }); + } + + /** test use of elisionfilter */ + public void testContractions() throws IOException { + Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(a, "b'fhearr m'athair", + new String[] { "fearr", "athair" }); + } + + /** test use of exclusion set */ + public void testExclude() throws IOException { + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("feirmeoireacht"), false); + Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT, + IrishAnalyzer.getDefaultStopSet(), exclusionSet); + checkOneTermReuse(a, "feirmeoireacht", "feirmeoireacht"); + checkOneTermReuse(a, "siopadóireacht", "siopadóir"); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new IrishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } +} Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java (revision 0) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java (working copy) @@ -0,0 +1,41 @@ +package org.apache.lucene.analysis.ga; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.StringReader; + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; + +/** + * Test the Irish lowercase filter. + */ +public class TestIrishLowerCaseFilter extends BaseTokenStreamTestCase { + + /** + * Test lowercase + */ + public void testIrishLowerCaseFilter() throws Exception { + TokenStream stream = new MockTokenizer(new StringReader( + "nAthair tUISCE hARD"), MockTokenizer.WHITESPACE, false); + IrishLowerCaseFilter filter = new IrishLowerCaseFilter(stream); + assertTokenStreamContents(filter, new String[] {"n-athair", "t-uisce", + "hard",}); + } +} Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (working copy) @@ -0,0 +1,140 @@ +package org.apache.lucene.analysis.ga; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.Arrays; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.fr.ElisionFilter; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.snowball.SnowballFilter; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; +import org.tartarus.snowball.ext.IrishStemmer; + +/** + * {@link Analyzer} for Irish. + */ +public final class IrishAnalyzer extends StopwordAnalyzerBase { + private final CharArraySet stemExclusionSet; + + /** File containing default Irish stopwords. */ + public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; + + private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( + new CharArraySet(Version.LUCENE_CURRENT, + Arrays.asList( + "d", "m", "b" + ), true)); + + /** + * Returns an unmodifiable instance of the default stop words set. + * @return default stop words set. + */ + public static CharArraySet getDefaultStopSet(){ + return DefaultSetHolder.DEFAULT_STOP_SET; + } + + /** + * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class + * accesses the static final set the first time.; + */ + private static class DefaultSetHolder { + static final CharArraySet DEFAULT_STOP_SET; + + static { + try { + DEFAULT_STOP_SET = loadStopwordSet(false, + IrishAnalyzer.class, DEFAULT_STOPWORD_FILE, "#"); + } catch (IOException ex) { + // default set should always be present as it is part of the + // distribution (JAR) + throw new RuntimeException("Unable to load default stopword set"); + } + } + } + + /** + * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + */ + public IrishAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param matchVersion lucene compatibility version + * @param stopwords a stopword set + */ + public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + } + + /** + * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is + * provided this analyzer will add a {@link KeywordMarkerFilter} before + * stemming. + * + * @param matchVersion lucene compatibility version + * @param stopwords a stopword set + * @param stemExclusionSet a set of terms not to be stemmed + */ + public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); + } + + /** + * Creates a + * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} + * which tokenizes all the text in the provided {@link Reader}. + * + * @return A + * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} + * built from an {@link StandardTokenizer} filtered with + * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} + * , {@link KeywordMarkerFilter} if a stem exclusion set is + * provided and {@link SnowballFilter}. + */ + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + final Tokenizer source = new StandardTokenizer(matchVersion, reader); + TokenStream result = new StandardFilter(matchVersion, source); + if (matchVersion.onOrAfter(Version.LUCENE_36)) { + result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES); + } + result = new IrishLowerCaseFilter(result); + result = new StopFilter(matchVersion, result, stopwords); + if(!stemExclusionSet.isEmpty()) + result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SnowballFilter(result, new IrishStemmer()); + return new TokenStreamComponents(source, result); + } +} Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java (working copy) @@ -0,0 +1,85 @@ +package org.apache.lucene.analysis.ga; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +/** + * Normalises token text to lower case, handling t-prothesis + * and n-eclipsis (i.e., that 'nAthair' should become 'n-athair') + */ +public final class IrishLowerCaseFilter extends TokenFilter { + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + /** + * Create an IrishLowerCaseFilter that normalises Irish token text. + */ + public IrishLowerCaseFilter(TokenStream in) { + super(in); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + char[] chArray = termAtt.buffer(); + int chLen = termAtt.length(); + int idx = 0; + + if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1])) { + chArray = termAtt.resizeBuffer(chLen + 1); + for (int i = chLen; i > 1; i--) { + chArray[i] = chArray[i - 1]; + } + chArray[1] = '-'; + termAtt.setLength(chLen + 1); + idx = 2; + chLen = chLen + 1; + } + + for (int i = idx; i < chLen;) { + i += Character.toChars(Character.toLowerCase(chArray[i]), chArray, i); + } + return true; + } else { + return false; + } + } + + private boolean isUpperVowel (int v) { + switch (v) { + case 'A': + case 'E': + case 'I': + case 'O': + case 'U': + // vowels with acute accent (fada) + case '\u00c1': + case '\u00c9': + case '\u00cd': + case '\u00d3': + case '\u00da': + return true; + default: + return false; + } + } +} Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ga/package.html =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/ga/package.html (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/ga/package.html (working copy) @@ -0,0 +1,22 @@ + + + + +Analysis for Irish. + + Index: modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java =================================================================== --- modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java (revision 0) +++ modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java (working copy) @@ -0,0 +1,587 @@ +// This file was generated automatically by the Snowball to Java compiler + +package org.tartarus.snowball.ext; + +import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; + + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +public class IrishStemmer extends SnowballProgram { + +private static final long serialVersionUID = 1L; + + private final static IrishStemmer methodObject = new IrishStemmer (); + + private final static Among a_0[] = { + new Among ( "b'", -1, 4, "", methodObject ), + new Among ( "bh", -1, 14, "", methodObject ), + new Among ( "bhf", 1, 9, "", methodObject ), + new Among ( "bp", -1, 11, "", methodObject ), + new Among ( "ch", -1, 15, "", methodObject ), + new Among ( "d'", -1, 2, "", methodObject ), + new Among ( "d'fh", 5, 3, "", methodObject ), + new Among ( "dh", -1, 16, "", methodObject ), + new Among ( "dt", -1, 13, "", methodObject ), + new Among ( "fh", -1, 17, "", methodObject ), + new Among ( "gc", -1, 7, "", methodObject ), + new Among ( "gh", -1, 18, "", methodObject ), + new Among ( "h-", -1, 1, "", methodObject ), + new Among ( "m'", -1, 4, "", methodObject ), + new Among ( "mb", -1, 6, "", methodObject ), + new Among ( "mh", -1, 19, "", methodObject ), + new Among ( "n-", -1, 1, "", methodObject ), + new Among ( "nd", -1, 8, "", methodObject ), + new Among ( "ng", -1, 10, "", methodObject ), + new Among ( "ph", -1, 20, "", methodObject ), + new Among ( "sh", -1, 5, "", methodObject ), + new Among ( "t-", -1, 1, "", methodObject ), + new Among ( "th", -1, 21, "", methodObject ), + new Among ( "ts", -1, 12, "", methodObject ) + }; + + private final static Among a_1[] = { + new Among ( "\u00EDochta", -1, 1, "", methodObject ), + new Among ( "a\u00EDochta", 0, 1, "", methodObject ), + new Among ( "ire", -1, 2, "", methodObject ), + new Among ( "aire", 2, 2, "", methodObject ), + new Among ( "abh", -1, 1, "", methodObject ), + new Among ( "eabh", 4, 1, "", methodObject ), + new Among ( "ibh", -1, 1, "", methodObject ), + new Among ( "aibh", 6, 1, "", methodObject ), + new Among ( "amh", -1, 1, "", methodObject ), + new Among ( "eamh", 8, 1, "", methodObject ), + new Among ( "imh", -1, 1, "", methodObject ), + new Among ( "aimh", 10, 1, "", methodObject ), + new Among ( "\u00EDocht", -1, 1, "", methodObject ), + new Among ( "a\u00EDocht", 12, 1, "", methodObject ), + new Among ( "ir\u00ED", -1, 2, "", methodObject ), + new Among ( "air\u00ED", 14, 2, "", methodObject ) + }; + + private final static Among a_2[] = { + new Among ( "\u00F3ideacha", -1, 6, "", methodObject ), + new Among ( "patacha", -1, 5, "", methodObject ), + new Among ( "achta", -1, 1, "", methodObject ), + new Among ( "arcachta", 2, 2, "", methodObject ), + new Among ( "eachta", 2, 1, "", methodObject ), + new Among ( "grafa\u00EDochta", -1, 4, "", methodObject ), + new Among ( "paite", -1, 5, "", methodObject ), + new Among ( "ach", -1, 1, "", methodObject ), + new Among ( "each", 7, 1, "", methodObject ), + new Among ( "\u00F3ideach", 8, 6, "", methodObject ), + new Among ( "gineach", 8, 3, "", methodObject ), + new Among ( "patach", 7, 5, "", methodObject ), + new Among ( "grafa\u00EDoch", -1, 4, "", methodObject ), + new Among ( "pataigh", -1, 5, "", methodObject ), + new Among ( "\u00F3idigh", -1, 6, "", methodObject ), + new Among ( "acht\u00FAil", -1, 1, "", methodObject ), + new Among ( "eacht\u00FAil", 15, 1, "", methodObject ), + new Among ( "gineas", -1, 3, "", methodObject ), + new Among ( "ginis", -1, 3, "", methodObject ), + new Among ( "acht", -1, 1, "", methodObject ), + new Among ( "arcacht", 19, 2, "", methodObject ), + new Among ( "eacht", 19, 1, "", methodObject ), + new Among ( "grafa\u00EDocht", -1, 4, "", methodObject ), + new Among ( "arcachta\u00ED", -1, 2, "", methodObject ), + new Among ( "grafa\u00EDochta\u00ED", -1, 4, "", methodObject ) + }; + + private final static Among a_3[] = { + new Among ( "imid", -1, 1, "", methodObject ), + new Among ( "aimid", 0, 1, "", methodObject ), + new Among ( "\u00EDmid", -1, 1, "", methodObject ), + new Among ( "a\u00EDmid", 2, 1, "", methodObject ), + new Among ( "adh", -1, 2, "", methodObject ), + new Among ( "eadh", 4, 2, "", methodObject ), + new Among ( "faidh", -1, 1, "", methodObject ), + new Among ( "fidh", -1, 1, "", methodObject ), + new Among ( "\u00E1il", -1, 2, "", methodObject ), + new Among ( "ain", -1, 2, "", methodObject ), + new Among ( "tear", -1, 2, "", methodObject ), + new Among ( "tar", -1, 2, "", methodObject ) + }; + + private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2 }; + + private int I_p2; + private int I_p1; + private int I_pV; + + private void copy_from(IrishStemmer other) { + I_p2 = other.I_p2; + I_p1 = other.I_p1; + I_pV = other.I_pV; + super.copy_from(other); + } + + private boolean r_mark_regions() { + int v_1; + int v_3; + // (, line 28 + I_pV = limit; + I_p1 = limit; + I_p2 = limit; + // do, line 34 + v_1 = cursor; + lab0: do { + // (, line 34 + // gopast, line 35 + golab1: while(true) + { + lab2: do { + if (!(in_grouping(g_v, 97, 250))) + { + break lab2; + } + break golab1; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } + // setmark pV, line 35 + I_pV = cursor; + } while (false); + cursor = v_1; + // do, line 37 + v_3 = cursor; + lab3: do { + // (, line 37 + // gopast, line 38 + golab4: while(true) + { + lab5: do { + if (!(in_grouping(g_v, 97, 250))) + { + break lab5; + } + break golab4; + } while (false); + if (cursor >= limit) + { + break lab3; + } + cursor++; + } + // gopast, line 38 + golab6: while(true) + { + lab7: do { + if (!(out_grouping(g_v, 97, 250))) + { + break lab7; + } + break golab6; + } while (false); + if (cursor >= limit) + { + break lab3; + } + cursor++; + } + // setmark p1, line 38 + I_p1 = cursor; + // gopast, line 39 + golab8: while(true) + { + lab9: do { + if (!(in_grouping(g_v, 97, 250))) + { + break lab9; + } + break golab8; + } while (false); + if (cursor >= limit) + { + break lab3; + } + cursor++; + } + // gopast, line 39 + golab10: while(true) + { + lab11: do { + if (!(out_grouping(g_v, 97, 250))) + { + break lab11; + } + break golab10; + } while (false); + if (cursor >= limit) + { + break lab3; + } + cursor++; + } + // setmark p2, line 39 + I_p2 = cursor; + } while (false); + cursor = v_3; + return true; + } + + private boolean r_initial_morph() { + int among_var; + // (, line 43 + // [, line 44 + bra = cursor; + // substring, line 44 + among_var = find_among(a_0, 24); + if (among_var == 0) + { + return false; + } + // ], line 44 + ket = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 46 + // delete, line 46 + slice_del(); + break; + case 2: + // (, line 50 + // delete, line 50 + slice_del(); + break; + case 3: + // (, line 52 + // <-, line 52 + slice_from("f"); + break; + case 4: + // (, line 55 + // delete, line 55 + slice_del(); + break; + case 5: + // (, line 58 + // <-, line 58 + slice_from("s"); + break; + case 6: + // (, line 61 + // <-, line 61 + slice_from("b"); + break; + case 7: + // (, line 63 + // <-, line 63 + slice_from("c"); + break; + case 8: + // (, line 65 + // <-, line 65 + slice_from("d"); + break; + case 9: + // (, line 67 + // <-, line 67 + slice_from("f"); + break; + case 10: + // (, line 69 + // <-, line 69 + slice_from("g"); + break; + case 11: + // (, line 71 + // <-, line 71 + slice_from("p"); + break; + case 12: + // (, line 73 + // <-, line 73 + slice_from("s"); + break; + case 13: + // (, line 75 + // <-, line 75 + slice_from("t"); + break; + case 14: + // (, line 79 + // <-, line 79 + slice_from("b"); + break; + case 15: + // (, line 81 + // <-, line 81 + slice_from("c"); + break; + case 16: + // (, line 83 + // <-, line 83 + slice_from("d"); + break; + case 17: + // (, line 85 + // <-, line 85 + slice_from("f"); + break; + case 18: + // (, line 87 + // <-, line 87 + slice_from("g"); + break; + case 19: + // (, line 89 + // <-, line 89 + slice_from("m"); + break; + case 20: + // (, line 91 + // <-, line 91 + slice_from("p"); + break; + case 21: + // (, line 93 + // <-, line 93 + slice_from("t"); + break; + } + return true; + } + + private boolean r_RV() { + if (!(I_pV <= cursor)) + { + return false; + } + return true; + } + + private boolean r_R1() { + if (!(I_p1 <= cursor)) + { + return false; + } + return true; + } + + private boolean r_R2() { + if (!(I_p2 <= cursor)) + { + return false; + } + return true; + } + + private boolean r_noun_sfx() { + int among_var; + // (, line 103 + // [, line 104 + ket = cursor; + // substring, line 104 + among_var = find_among_b(a_1, 16); + if (among_var == 0) + { + return false; + } + // ], line 104 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 108 + // call R1, line 108 + if (!r_R1()) + { + return false; + } + // delete, line 108 + slice_del(); + break; + case 2: + // (, line 110 + // call R2, line 110 + if (!r_R2()) + { + return false; + } + // delete, line 110 + slice_del(); + break; + } + return true; + } + + private boolean r_deriv() { + int among_var; + // (, line 113 + // [, line 114 + ket = cursor; + // substring, line 114 + among_var = find_among_b(a_2, 25); + if (among_var == 0) + { + return false; + } + // ], line 114 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 116 + // call R2, line 116 + if (!r_R2()) + { + return false; + } + // delete, line 116 + slice_del(); + break; + case 2: + // (, line 118 + // <-, line 118 + slice_from("arc"); + break; + case 3: + // (, line 120 + // <-, line 120 + slice_from("gin"); + break; + case 4: + // (, line 122 + // <-, line 122 + slice_from("graf"); + break; + case 5: + // (, line 124 + // <-, line 124 + slice_from("paite"); + break; + case 6: + // (, line 126 + // <-, line 126 + slice_from("\u00F3id"); + break; + } + return true; + } + + private boolean r_verb_sfx() { + int among_var; + // (, line 129 + // [, line 130 + ket = cursor; + // substring, line 130 + among_var = find_among_b(a_3, 12); + if (among_var == 0) + { + return false; + } + // ], line 130 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 133 + // call RV, line 133 + if (!r_RV()) + { + return false; + } + // delete, line 133 + slice_del(); + break; + case 2: + // (, line 138 + // call R1, line 138 + if (!r_R1()) + { + return false; + } + // delete, line 138 + slice_del(); + break; + } + return true; + } + + public boolean stem() { + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + // (, line 143 + // do, line 144 + v_1 = cursor; + lab0: do { + // call initial_morph, line 144 + if (!r_initial_morph()) + { + break lab0; + } + } while (false); + cursor = v_1; + // do, line 145 + v_2 = cursor; + lab1: do { + // call mark_regions, line 145 + if (!r_mark_regions()) + { + break lab1; + } + } while (false); + cursor = v_2; + // backwards, line 146 + limit_backward = cursor; cursor = limit; + // (, line 146 + // do, line 147 + v_3 = limit - cursor; + lab2: do { + // call noun_sfx, line 147 + if (!r_noun_sfx()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 148 + v_4 = limit - cursor; + lab3: do { + // call deriv, line 148 + if (!r_deriv()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + // do, line 149 + v_5 = limit - cursor; + lab4: do { + // call verb_sfx, line 149 + if (!r_verb_sfx()) + { + break lab4; + } + } while (false); + cursor = limit - v_5; + cursor = limit_backward; return true; + } + + public boolean equals( Object o ) { + return o instanceof IrishStemmer; + } + + public int hashCode() { + return IrishStemmer.class.getName().hashCode(); + } + + + +} + Index: modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt =================================================================== --- modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt (revision 0) +++ modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt (working copy) @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár