Index: contrib/lexicon/lib/commons-lang-2.3.jar
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: contrib/lexicon/lib/commons-lang-2.3.jar
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Index: contrib/lexicon/src/test/org/apache/lucene/lexicon/TestLexicon.java
===================================================================
--- contrib/lexicon/src/test/org/apache/lucene/lexicon/TestLexicon.java (revision 0)
+++ contrib/lexicon/src/test/org/apache/lucene/lexicon/TestLexicon.java (revision 0)
@@ -0,0 +1,129 @@
+package org.apache.lucene.lexicon;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.aphone.AphoneEn;
+import org.apache.lucene.lexicon.analyzer.AphoneAnalyser;
+import org.apache.lucene.lexicon.analyzer.NGramAnalyzer;
+import org.apache.lucene.lexicon.analyzer.SimilarAble;
+import org.apache.lucene.lexicon.analyzer.StemmerAnalyzer;
+import org.apache.lucene.lexicon.reader.DirectoryReader;
+import org.apache.lucene.lexicon.reader.LexiconReader;
+import org.apache.lucene.lexicon.reader.SimpleReader;
+import org.apache.lucene.lexicon.reader.Word;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ *
+ * @Author Mathieu Lecarme
+ */
+public class TestLexicon extends TestCase {
+ private String[] words = new String[]{
+ "Lucene",
+ "Apache",
+ "lexicon"
+ };
+
+ private Lexicon buildLexicon() throws IOException {
+ //return new Lexicon(FSDirectory.getDirectory("/tmp/index", false));
+ return new Lexicon(new RAMDirectory());
+ }
+
+ private void fillExamples(Lexicon lexicon) throws IOException {
+ LexiconReader reader = new SimpleReader(words);
+ reader.setTokenFilter(new LowerCaseFilter(reader.getStream()));
+ lexicon.read(reader);
+ }
+
+ public void testAphone () throws IOException {
+ Lexicon lexicon = buildLexicon();
+ AphoneAnalyser analyzer = new AphoneAnalyser(new AphoneEn());
+ lexicon.addAnalyser(analyzer);
+ fillExamples(lexicon);
+ Tokenizer tokenizer = new WhitespaceTokenizer(new StringReader("Lussene is an appache project"));
+ SimilarTokenFilter filter = new SimilarTokenFilter(tokenizer, lexicon, null, new SimilarAble[] {analyzer});
+ for(;;){
+ Token token = filter.next();
+ if(token == null)
+ break;
+ System.out.println(token.termText());
+ }
+ }
+
+ public void testStemmer() throws IOException {
+ Lexicon lexicon = buildLexicon();
+ StemmerAnalyzer analyzer = new StemmerAnalyzer("French");
+ String[] words2 = new String[] {"cheval", "bataille", "carotte"};
+ LexiconReader reader = new SimpleReader(words2);
+ reader.setTokenFilter(new LowerCaseFilter(reader.getStream()));
+ lexicon.addAnalyser(analyzer);
+ lexicon.read(reader);
+ String[] similars = lexicon.findSimilar("chevaux");
+ assertEquals(1, similars.length);
+ similars = lexicon.findSimilar("bateaux");
+ assertEquals(0, similars.length);
+ similars = lexicon.findSimilar("carote");
+ assertEquals(0, similars.length);
+ }
+
+ public void testSimilar() throws IOException {
+ Lexicon lexicon = buildLexicon();
+ AphoneAnalyser analyzer = new AphoneAnalyser(new AphoneEn());
+ lexicon.addAnalyser(analyzer);
+ fillExamples(lexicon);
+ String[] similars = lexicon.findSimilar("appache", null, new SimilarAble[] { analyzer});
+ assertEquals(1, similars.length);
+ similars = lexicon.findSimilar("appache", new String[]{"foo"}, new SimilarAble[] { analyzer});
+ assertEquals(0, similars.length);
+ similars = lexicon.findSimilar("appache", new String[]{Word.SIMPLE}, new SimilarAble[] { analyzer});
+ assertEquals(1, similars.length);
+ similars = lexicon.findSimilar("appache", null, null);
+ assertEquals(1, similars.length);
+ similars = lexicon.findSimilar("toto", null, new SimilarAble[] { analyzer});
+ assertEquals(0, similars.length);
+ }
+
+ public void testNear() throws IOException {
+ Lexicon lexicon = buildLexicon();
+ NGramAnalyzer analyzer = new NGramAnalyzer();
+ lexicon.addAnalyser(analyzer);
+ fillExamples(lexicon);
+ Suggestions nears = lexicon.findNear("apoche");
+ //System.out.println(nears);
+ assertEquals(1, nears.size());
+ assertEquals("apache", nears.getWordIterator().next());
+ }
+
+ public void testDirectory() throws IOException{
+ LexiconReader lexiconReader = new DirectoryReader(TestDirectoryReader.buildDirectory());
+ Lexicon lexicon = new Lexicon(new RAMDirectory());
+ lexicon.addAnalyser(new NGramAnalyzer());
+ lexicon.read(lexiconReader);
+ assertEquals(9, lexicon.getReader().numDocs());
+ assertEquals("lazy", lexicon.findNear("layz").getWordIterator().next());
+ }
+}
Index: contrib/lexicon/src/test/org/apache/lucene/lexicon/analyzer/TestAnalyzer.java
===================================================================
--- contrib/lexicon/src/test/org/apache/lucene/lexicon/analyzer/TestAnalyzer.java (revision 0)
+++ contrib/lexicon/src/test/org/apache/lucene/lexicon/analyzer/TestAnalyzer.java (revision 0)
@@ -0,0 +1,56 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.analyzer;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.aphone.Aphone;
+import org.apache.lucene.aphone.AphoneFr;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+
+/**
+ * @author mlecarme
+ *
+ */
+public class TestAnalyzer extends TestCase{
+
+ private void runLexiconAnalyzer(LexiconAnalyzer analyzer) {
+ Term term = new Term("test", "lucene");
+ Field[] fields = analyzer.getFields(term);
+ for(int a=0; a < fields.length; a++) {
+ //System.out.println(fields[a]);
+ }
+ }
+
+ private void runSimilar(String word, SimilarAble analyzer, String[] similars) {
+ for(int a =0; a < similars.length; a++)
+ assertEquals(analyzer.simplify(similars[a]), analyzer.simplify(word));
+ }
+
+ public void testNgram() {
+ LexiconAnalyzer ngram = new NGramAnalyzer();
+ runLexiconAnalyzer(ngram);
+ }
+
+ public void testFrenchAphone() {
+ Aphone aphone = new AphoneFr();
+ AphoneAnalyser analyzer = new AphoneAnalyser(aphone);
+ runLexiconAnalyzer(analyzer);
+ runSimilar("lucene", analyzer, new String[]{"lucenes", "lussene", "lhucene"});
+ }
+
+ public void testStem() {
+ StemmerAnalyzer analyzer = new StemmerAnalyzer("French");
+ runLexiconAnalyzer(analyzer);
+ runSimilar("lucene", analyzer, new String[]{"lucenne", "lucenes"});
+ }
+
+ public void testAnagram() {
+ AnagramAnalyzer analyzer = new AnagramAnalyzer();
+ runLexiconAnalyzer(analyzer);
+ runSimilar("lucene", analyzer, new String[]{"lucenne", "nucle", "lucne"});
+ }
+
+}
Index: contrib/lexicon/src/test/org/apache/lucene/lexicon/TestQueryUtils.java
===================================================================
--- contrib/lexicon/src/test/org/apache/lucene/lexicon/TestQueryUtils.java (revision 0)
+++ contrib/lexicon/src/test/org/apache/lucene/lexicon/TestQueryUtils.java (revision 0)
@@ -0,0 +1,44 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.util.Set;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+/**
+ * @author Mathieu Lecarme
+ *
+ */
+public class TestQueryUtils extends TestCase {
+ public void testExtractTerms() throws ParseException {
+ QueryParser parser = new QueryParser("txt", new WhitespaceAnalyzer());
+ Query query = parser.parse("lorem k:ipsum OR (dolores +nic) consequat*");
+ Set terms = QueryUtils.extractTerms(query);
+ assertTrue(terms.remove(new Term("txt", "lorem")));
+ assertTrue(terms.remove(new Term("k", "ipsum")));
+ assertTrue(terms.remove(new Term("txt", "dolores")));
+ assertTrue(terms.remove(new Term("txt", "nic")));
+ assertFalse(terms.remove(new Term("txt", "consequat")));
+ assertTrue(terms.isEmpty());
+ }
+
+ public void testFilter() throws ParseException {
+ QueryParser parser = new QueryParser("txt", new WhitespaceAnalyzer());
+ Query query = parser.parse("lorem k:ipsum OR (dolores +nic) consequat*");
+ AbstractTextTermQueryFilter filter = new AbstractTextTermQueryFilter() {
+ public String filter(String txt) {
+ return txt.toUpperCase();
+ }
+ };
+ Query filtered = QueryUtils.filter(query, filter);
+ System.out.println(filtered.toString());
+ }
+}
Index: contrib/lexicon/src/test/org/apache/lucene/lexicon/TestDirectoryReader.java
===================================================================
--- contrib/lexicon/src/test/org/apache/lucene/lexicon/TestDirectoryReader.java (revision 0)
+++ contrib/lexicon/src/test/org/apache/lucene/lexicon/TestDirectoryReader.java (revision 0)
@@ -0,0 +1,50 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.lexicon.reader.DirectoryReader;
+import org.apache.lucene.lexicon.reader.LexiconReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * @author Mathieu Lecarme
+ *
+ */
+public class TestDirectoryReader extends TestCase {
+ protected static Directory buildDirectory() throws IOException {
+ Directory directory = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(directory, new StopAnalyzer(), true);
+ Document document = new Document();
+ document.add(new Field("name", new StringReader("Robert Dupond")));
+ document.add(new Field("bio", new StringReader("The quick brown fox jumps over the lazy dog")));
+ writer.addDocument(document);
+ document = new Document();
+ document.add(new Field("name", new StringReader("John Doe")));
+ document.add(new Field("bio", new StringReader("The name John Doe is generally used as a placeholder name for a male party in a legal action or legal discussion whose true identity is unknown.")));
+ writer.close();
+ return directory;
+ }
+
+ public void testDirectory() throws IOException {
+ LexiconReader lexiconReader = new DirectoryReader(buildDirectory());
+ int cpt = 0;
+ for(;;) {
+ if(lexiconReader.next() == null)
+ break;
+ cpt++;
+ }
+ assertEquals(9, cpt);
+ }
+
+}
Index: contrib/lexicon/src/test/org/apache/lucene/lexicon/TestDummyStream.java
===================================================================
--- contrib/lexicon/src/test/org/apache/lucene/lexicon/TestDummyStream.java (revision 0)
+++ contrib/lexicon/src/test/org/apache/lucene/lexicon/TestDummyStream.java (revision 0)
@@ -0,0 +1,52 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Token;
+
+/**
+ * @author mlecarme
+ *
+ */
+public class TestDummyStream extends TestCase {
+
+ public void testStack() throws IOException {
+ List list = new ArrayList();
+ list.add("Pim");
+ list.add("Pam");
+ list.add("Poum");
+ DummyStream dummyStream = new DummyStream();
+ for(int a = 0; a < list.size(); a++)
+ dummyStream.add((String)list.get(a));
+ int cpt = 0;
+ for(;;){
+ Token token = dummyStream.next();
+ if(token == null)
+ break;
+ assertEquals(token.termText(), list.get(cpt));
+ cpt ++;
+ }
+ assertTrue(dummyStream.isEmpty());
+ dummyStream.addAll(list);
+ cpt = 0;
+ for(;;){
+ Token token = dummyStream.next();
+ if(token == null)
+ break;
+ assertEquals(token.termText(), list.get(cpt));
+ cpt ++;
+ }
+ dummyStream = new DummyStream(list);
+ assertEquals(list.size(), dummyStream.size());
+ dummyStream = new DummyStream("plop");
+ assertEquals(1, dummyStream.size());
+ }
+
+}
Index: contrib/lexicon/src/test/org/apache/lucene/lexicon/TestSuggestions.java
===================================================================
--- contrib/lexicon/src/test/org/apache/lucene/lexicon/TestSuggestions.java (revision 0)
+++ contrib/lexicon/src/test/org/apache/lucene/lexicon/TestSuggestions.java (revision 0)
@@ -0,0 +1,70 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.util.Arrays;
+import java.util.Set;
+import java.util.TreeSet;
+
+import junit.framework.TestCase;
+
+/**
+ * @author mlecarme
+ *
+ */
+public class TestSuggestions extends TestCase {
+ public void testDistanceMax() {
+ Suggestions suggestions = new Suggestions();
+ suggestions.setDistanceMax(2);
+ suggestions.setSizeMax(3);
+ suggestions.add(new Suggestive("toto", new Integer(1), null, null ));
+ assertEquals(1, suggestions.size());
+ suggestions.add(new Suggestive("toto", new Integer(3), null, null ));
+ assertEquals(1, suggestions.size());
+ }
+
+ public void testSet(){
+ Set set = new TreeSet();
+ Suggestive[] suggestives = new Suggestive[]{
+ new Suggestive("Pim", new Integer(1), new Float(42), new Integer(1) ),
+ new Suggestive("Pam", new Integer(1), new Float(43), new Integer(1) ),
+ new Suggestive("Poum", new Integer(1), new Float(30), new Integer(1) ),
+ new Suggestive("The Captain", new Integer(1), new Float(51), new Integer(1) )
+ };
+ set.addAll(Arrays.asList(suggestives));
+ assertEquals(4, set.size());
+ }
+
+ public void testAdd() {
+ Suggestions suggestions = new Suggestions();
+ suggestions.setDistanceMax(2);
+ suggestions.setSizeMax(3);
+ Suggestive s1 = new Suggestive("toto", new Integer(1), new Float(42), new Integer(1) );
+ Suggestive s2 = new Suggestive("toto2", new Integer(1), new Float(42), new Integer(1) );
+ assertNotSame(s1, s2);
+ assertNotSame(new Integer(s1.hashCode()), new Integer(s2.hashCode()));
+
+ Set set = new TreeSet();
+ set.add(s1);
+ set.add(s2);
+ assertEquals(2, set.size());
+ suggestions.add(s1);
+ suggestions.add(s2);
+ assertEquals(2, suggestions.size());
+ }
+
+ public void testSizeMax() {
+ Suggestions suggestions = new Suggestions();
+ suggestions.setDistanceMax(2);
+ suggestions.setSizeMax(3);
+ Suggestive[] suggestives = new Suggestive[]{
+ new Suggestive("Pim", new Integer(1), new Float(42), new Integer(1) ),
+ new Suggestive("Pam", new Integer(1), new Float(43), new Integer(1) ),
+ new Suggestive("Poum", new Integer(1), new Float(30), new Integer(1) ),
+ new Suggestive("The Captain", new Integer(1), new Float(51), new Integer(1) )
+ };
+ suggestions.addAll(Arrays.asList(suggestives));
+ assertEquals(3, suggestions.size());
+ }
+}
Index: contrib/lexicon/src/test/org/apache/lucene/lexicon/TestSearcher.java
===================================================================
--- contrib/lexicon/src/test/org/apache/lucene/lexicon/TestSearcher.java (revision 0)
+++ contrib/lexicon/src/test/org/apache/lucene/lexicon/TestSearcher.java (revision 0)
@@ -0,0 +1,39 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.lexicon.analyzer.NGramAnalyzer;
+import org.apache.lucene.lexicon.reader.DirectoryReader;
+import org.apache.lucene.lexicon.reader.LexiconReader;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * @author mlecarme
+ *
+ */
+public class TestSearcher extends TestCase {
+ public void testSuggestedQuery() throws IOException, ParseException {
+ Directory directory = TestDirectoryReader.buildDirectory();
+ LexiconReader lexiconReader = new DirectoryReader(directory);
+ Lexicon lexicon = new Lexicon(new RAMDirectory());
+ lexicon.addAnalyser(new NGramAnalyzer());
+ lexicon.read(lexiconReader);
+ QueryParser parser = new QueryParser("txt", new WhitespaceAnalyzer());
+ Query query = parser.parse("bio:brawn");
+ SuggestiveSearcher searcher = new SuggestiveSearcher(new IndexSearcher(directory), lexicon);
+ SuggestiveHits hits = searcher.searchWithSuggestions(query);
+ System.out.println(hits.getSuggestedQuery());
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/AbstractSearcher.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/AbstractSearcher.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/AbstractSearcher.java (revision 0)
@@ -0,0 +1,74 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.HitCollector;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.search.Weight;
+
+/**
+ * @author mlecarme
+ *
+ */
+public abstract class AbstractSearcher extends Searcher{
+ protected Searcher searcher;
+ protected Lexicon lexicon;
+
+ public void close() throws IOException {
+ searcher.close();
+ }
+
+ public Document doc(int i) throws CorruptIndexException, IOException {
+ return searcher.doc(i);
+ }
+
+ public int docFreq(Term term) throws IOException {
+ return searcher.docFreq(term);
+ }
+
+ public Explanation explain(Weight weight, int doc) throws IOException {
+ return searcher.explain(weight, doc);
+ }
+
+ public int maxDoc() throws IOException {
+ return searcher.maxDoc();
+ }
+
+ public Query rewrite(Query query) throws IOException {
+ return searcher.rewrite(query);
+ }
+
+ public void search(Weight weight, Filter filter, HitCollector results)
+ throws IOException {
+ searcher.search(weight, filter, results);
+ }
+
+ public TopDocs search(Weight weight, Filter filter, int n)
+ throws IOException {
+ return search(weight, filter, n);
+ }
+
+ public TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort)
+ throws IOException {
+ return searcher.search(weight, filter, n, sort);
+ }
+
+ public Document doc(int n, FieldSelector fieldSelector)
+ throws CorruptIndexException, IOException {
+ return searcher.doc(n, fieldSelector);
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/NotIndexedException.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/NotIndexedException.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/NotIndexedException.java (revision 0)
@@ -0,0 +1,29 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+
+import org.apache.lucene.lexicon.analyzer.LexiconAnalyzer;
+
+/**
+ * This Analyzer was not uesd during indexation
+ * @author Mathieu Lecarme
+ *
+ */
+public class NotIndexedException extends IOException{
+ private static final long serialVersionUID = -4347569712585625159L;
+
+ public NotIndexedException() {
+ super();
+ }
+
+ public NotIndexedException(String s) {
+ super(s);
+ }
+
+ public NotIndexedException(LexiconAnalyzer analyzer) {
+ super(analyzer.toString());
+ }
+}
\ No newline at end of file
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/Lexicon.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/Lexicon.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/Lexicon.java (revision 0)
@@ -0,0 +1,322 @@
+package org.apache.lucene.lexicon;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.lexicon.analyzer.Distanceable;
+import org.apache.lucene.lexicon.analyzer.LexiconAnalyzer;
+import org.apache.lucene.lexicon.analyzer.NearAble;
+import org.apache.lucene.lexicon.analyzer.SimilarAble;
+import org.apache.lucene.lexicon.reader.LexiconReader;
+import org.apache.lucene.lexicon.reader.Word;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+
+/**
+ * A list of word
+ *
+ * [TODO] vérification de l'existence d'un mot dans son index source, => lazy delete
+ * [TODO] indexation du nombre d'occurrence
+ * [TODO] mis à jour des index (mots en plus, nb d'occurences)
+ *
+ *
+ * A Lexicon uses a Lucene Directory.
+ *
+ * Lexicon lexicon = new new Lexicon(new RAMDirectory());
+ *
+ * Lexicon can be feed from a Lucene index, or in simpler way, from a list of word, or a plain text file.
+ *
+ * String[] words = new String[]{"Lucene", "Apache", "lexicon"};
+ * LexiconReader reader = new SimpleReader(words);
+ *
+ * Lucene TokenFilter can be used.
+ *
+ * reader.setTokenFilter(new LowerCaseFilter(reader.getStream()));
+ * lexicon.read(reader);
+ *
+ * @author Mathieu Lecarme
+ * @see {http://en.wikipedia.org/wiki/Lexicon}
+ */
+public class Lexicon {
+ public static final String WORD = "word";
+ public static final String FIELD = "field";
+ public static final String POPULARITY = "pop";
+ private Integer batchSize = new Integer(1000);
+ private IndexReader reader;
+ private IndexSearcher searcher = null;
+ private Directory spellIndex;
+ private LinkedHashSet analysers = new LinkedHashSet();
+
+ /**
+ * Build a Lexicon with a Directory
+ * @param spellIndex
+ * @throws IOException
+ * @throws LockObtainFailedException
+ * @throws CorruptIndexException
+ */
+ public Lexicon(Directory spellIndex) throws CorruptIndexException, LockObtainFailedException, IOException {
+ this.spellIndex = spellIndex;
+ if (!IndexReader.indexExists(spellIndex)) {
+ IndexWriter writer = new IndexWriter(spellIndex, null, true);
+ writer.close();
+ }
+ // close the old searcher, if there was one
+ if (searcher != null) {
+ this.searcher.close();
+ }
+ this.searcher = new IndexSearcher(this.spellIndex);
+ this.reader = this.searcher.getIndexReader();
+ }
+
+ public IndexSearcher getSearcher(){
+ return searcher;
+ }
+
+ public IndexReader getReader() throws CorruptIndexException, IOException{
+ if (reader == null) {
+ reader = IndexReader.open(spellIndex);
+ }
+ return reader;
+ }
+
+ /**
+ * Read a LexiconReader to add some Word
+ * @param lexiconReader
+ * @throws IOException
+ */
+ public void read(LexiconReader lexiconReader) throws IOException {
+ IndexReader.unlock(spellIndex);
+ IndexWriter writer = new IndexWriter(spellIndex, new WhitespaceAnalyzer(),
+ !IndexReader.indexExists(spellIndex));
+ writer.setMergeFactor(300);
+ writer.setMaxBufferedDocs(150);
+ Word word;
+ for(;;){
+ word = lexiconReader.filteredNext();
+ if(word == null)
+ break;
+ //[FIXME] réutiliser le document existant pour empiler les fields
+ Document document = new Document();
+ document.add(new Field(WORD, word.getText(), Store.YES, Index.UN_TOKENIZED));
+ document.add(new Field(POPULARITY, word.getPopularity().toString(), Store.YES, Index.UN_TOKENIZED));
+ if(word.getField() != null)
+ document.add(new Field(FIELD, word.getField(), Store.NO, Index.UN_TOKENIZED));
+ Iterator iterator = analysers.iterator();
+ while (iterator.hasNext()) {
+ LexiconAnalyzer dictionaryAnalyser = (LexiconAnalyzer) iterator.next();
+ Field[] fields =dictionaryAnalyser.getFields(word.getTerm());
+ for(int i = 0; i < fields.length; i++)
+ document.add(fields[i]);
+ }
+ writer.addDocument(document);
+ }
+ // close writer
+ writer.optimize();
+ writer.close();
+ // close reader so it will be re-opened (and see the new content) when exist()
+ // is called the next time:
+ if (reader != null) {
+ reader.close();
+ reader = null;
+ }
+ // also re-open the spell index to see our own changes when the next suggestion
+ // is fetched:
+ searcher.close();
+ searcher = new IndexSearcher(this.spellIndex);
+ }
+
+ /**
+ * Remove a term
+ * @param term
+ */
+ public void remove(Term term){
+
+ }
+
+ /**
+ * end batch remove
+ */
+ public void flush(){
+
+ }
+ /**
+ * Add an analyser for parsing indexed terms
+ * @param analyser
+ */
+ public void addAnalyser(LexiconAnalyzer analyser) {
+ analysers.add(analyser);
+ }
+
+ /**
+ * The smallest distance between two doc
+ * Levenshtein then any Distanceable implemention registered are used and smallest result are kept
+ * @param document1
+ * @param document2
+ * @return
+ */
+ protected Integer distance(String word, Document document) {
+ if(word == null || document == null)
+ return null;
+ if(word.equals(document.get(WORD)))
+ return new Integer(0);
+ Iterator iterator = analysers.iterator();
+ int distance = StringUtils.getLevenshteinDistance(word, document.get(WORD));
+ while(iterator.hasNext()) {
+ LexiconAnalyzer analyzer = (LexiconAnalyzer)iterator.next();
+ if(analyzer instanceof Distanceable) {
+ distance = Math.min(distance,
+ ((Distanceable)analyzer).distance(word, document));
+ }
+ }
+ return new Integer(distance);
+ }
+
+ /**
+ * Check whether the word exists in the index.
+ * @param word
+ * @throws IOException
+ * @return true if the word exists in the index
+ */
+ public boolean exist(String word) throws IOException {
+ return getReader().docFreq(new Term(WORD, word)) > 0;
+ }
+
+ public void empty(){
+ }
+
+ /**
+ * Find similar words
+ * @param word
+ * @param fields
+ * @param similars
+ * @return
+ * @throws IOException
+ */
+ public String[] findSimilar(String word, String[] fields, SimilarAble[] similars) throws IOException {
+ BooleanQuery query = new BooleanQuery();
+ if(fields != null && fields.length > 0) {
+ for(int a = 0; a < fields.length; a++)
+ query.add(new BooleanClause(new TermQuery(
+ new Term(FIELD, fields[a])), Occur.MUST));
+ }
+ if(similars == null){
+ similars = new SimilarAble[analysers.size()];
+ Iterator iterator = analysers.iterator();
+ int cpt = 0;
+ while(iterator.hasNext()){
+ LexiconAnalyzer lexiconAnalyzer = (LexiconAnalyzer)iterator.next();
+ if(lexiconAnalyzer instanceof SimilarAble)
+ similars[cpt++] = (SimilarAble)lexiconAnalyzer;
+ }
+ }
+ for(int a = 0; a < similars.length; a++) {
+ if(! analysers.contains(similars[a]))
+ throw new NotIndexedException((LexiconAnalyzer)similars[a]);
+ query.add(new BooleanClause(similars[a].findSimilar(word), Occur.MUST));
+ }
+ Hits hits = getSearcher().search(query);
+ String[] words = new String[hits.length()];
+ for(int a = 0; a < hits.length(); a++)
+ words[a] = hits.doc(a).get(WORD);
+ return words;
+ }
+
+ public String[] findSimilar(String word) throws IOException {
+ return findSimilar(word, null, null);
+ }
+
+ /**
+ * Find near words from a mispelled one.
+ * @param word
+ * @param fields
+ * @param nears
+ * @return
+ * @throws IOException
+ */
+ public Suggestions findNear(String word, String[] fields, NearAble[] nears) throws IOException {
+ BooleanQuery query = new BooleanQuery();
+ if(fields != null && fields.length > 0) {
+ for(int a = 0; a < fields.length; a++)
+ query.add(new BooleanClause(new TermQuery(
+ new Term(FIELD, fields[a])), Occur.MUST));
+ }
+ if( nears == null) {
+ nears = new NearAble[analysers.size()];
+ Iterator iterator = analysers.iterator();
+ int cpt = 0;
+ while(iterator.hasNext()){
+ LexiconAnalyzer lexiconAnalyzer = (LexiconAnalyzer)iterator.next();
+ if(lexiconAnalyzer instanceof NearAble)
+ nears[cpt++] = (NearAble)lexiconAnalyzer;
+ }
+ }
+ for(int a = 0; a < nears.length; a++) {
+ if(! analysers.contains(nears[a]))
+ throw new NotIndexedException((LexiconAnalyzer)nears[a]);
+ query.add(new BooleanClause(nears[a].findNear(word), Occur.MUST));
+ }
+ Hits hits = getSearcher().search(query);
+ Suggestions set = new Suggestions();
+ for(int a = 0; a < hits.length(); a++){
+ Document doc = hits.doc(a);
+ String suggestedWord = doc.get(WORD);
+ set.add(new Suggestive(suggestedWord, distance(word, doc), new Float(hits.score(a)), new Integer(Integer.parseInt(doc.get(POPULARITY)))));
+ }
+ return set;
+ }
+
+ public Suggestions findNear(String word) throws IOException {
+ return findNear(word, null, null);
+ }
+
+ public Suggestions findNear(Term term) throws IOException {
+ return findNear(term, null);
+ }
+
+ public Suggestions findNear(Term term, NearAble[] nears) throws IOException {
+ return findNear(term.text(), new String[]{ term.field() }, nears);
+ }
+
+ public Integer getBatchSize() {
+ return batchSize;
+ }
+
+ public void setBatchSize(Integer batchSize) {
+ this.batchSize = batchSize;
+ }
+
+}
\ No newline at end of file
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/LexiconReader.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/LexiconReader.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/LexiconReader.java (revision 0)
@@ -0,0 +1,43 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.reader;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.lexicon.DummyStream;
+
+/**
+ * @author mlecarme
+ *
+ */
+public abstract class LexiconReader {
+ protected TokenFilter tokenFilter = null;
+ private DummyStream stream = new DummyStream();
+ private Word currentWord;
+
+ public abstract Word next() throws IOException;
+
+ public void setTokenFilter(TokenFilter tokenFilter) {
+ this.tokenFilter = tokenFilter;
+ }
+
+ public TokenStream getStream() throws IOException {
+ return stream;
+ }
+
+ public Word filteredNext() throws IOException {
+ if(tokenFilter == null)
+ return this.next();
+ if(stream.isEmpty()) {
+ currentWord = this.next();
+ if(currentWord == null)
+ return null;
+ stream.add(currentWord.getText());
+ }
+ return new Word(currentWord.getField(), tokenFilter.next().termText(), currentWord.getPopularity());
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/DirectoryReader.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/DirectoryReader.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/DirectoryReader.java (revision 0)
@@ -0,0 +1,84 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.reader;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+
+/**
+ * Lucene Directory reader to feed a Lexicon
+ * @author mlecarme
+ *
+ * [TODO] synchronisation lors d'une réindexation
+ *
+ *
+ */
+public class DirectoryReader extends LexiconReader{
+ private IndexReader reader;
+ private IndexSearcher searcher;
+ private Directory spellIndex;
+ private TermEnum terms;
+ private int thresold = 0;
+ private Set fieldNames;
+
+ public void setThresold(int thresold) {
+ this.thresold = thresold;
+ }
+
+ public void setFieldNames(String[] fieldNames) {
+ if(fieldNames != null)
+ this.fieldNames = new HashSet(Arrays.asList(fieldNames));
+ }
+
+ private void init(Directory spellIndex, String[] fieldNames) throws IOException, LockObtainFailedException {
+ setFieldNames(fieldNames);
+ this.spellIndex = spellIndex;
+ if (!IndexReader.indexExists(spellIndex)) {
+ IndexWriter writer = new IndexWriter(spellIndex, null, true);
+ writer.close();
+ }
+ // close the old searcher, if there was one
+ if (searcher != null) {
+ this.searcher.close();
+ }
+ this.searcher = new IndexSearcher(this.spellIndex);
+ this.reader = this.searcher.getIndexReader();
+ this.terms = this.reader.terms();
+ }
+
+ public DirectoryReader(Directory spellIndex) throws IOException, LockObtainFailedException {
+ init(spellIndex, null);
+ }
+
+ public DirectoryReader(Directory spellIndex, String[] fieldNames) throws IOException {
+ init(spellIndex, fieldNames);
+ }
+
+ public Word next() throws IOException {
+ if(! this.terms.next())
+ return null;
+ Term term = terms.term();
+ int docFreq = reader.docFreq(term);
+ if(docFreq < thresold)
+ return next();
+ if(fieldNames != null && ! fieldNames.contains(term.field()))
+ return next();
+ return new Word(term, new Integer(docFreq));
+ }
+
+ public void seek() throws IOException {
+ this.terms = this.reader.terms();
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/Word.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/Word.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/Word.java (revision 0)
@@ -0,0 +1,70 @@
+package org.apache.lucene.lexicon.reader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Term;
+
+/**
+ * A Word for Lexicon Entry
+ * @author Mathieu Lecarme
+ *
+ */
+public class Word {
+ public static final String SIMPLE = "simple";
+ private String field = SIMPLE;
+ private String text;
+ private Integer popularity = new Integer(1);
+
+ public Word(){}
+
+ public Word(Term term, Integer occurency) {
+ text = term.text();
+ field = term.field();
+ this.popularity = occurency;
+ }
+
+ public Word(String field, String text, Integer occurency) {
+ this.field = field;
+ this.text = text;
+ this.popularity = occurency;
+ }
+
+ public Word(String text) {
+ this.text = text;
+ }
+
+ public String getField() {
+ return field;
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public Integer getPopularity() {
+ return popularity;
+ }
+
+ public Term getTerm() {
+ return new Term(getField(), getText());
+ }
+
+ public String toString() {
+ return "";
+ }
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/SimpleReader.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/SimpleReader.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/reader/SimpleReader.java (revision 0)
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.reader;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * @author Mathieu Lecarme
+ *
+ */
+public class SimpleReader extends LexiconReader {
+ private Iterator iterator;
+ private String field = "simple";
+
+ public SimpleReader(Iterator iterator) {
+ this.iterator = iterator;
+ }
+
+ public SimpleReader(Collection collection) {
+ this.iterator = collection.iterator();
+ }
+
+ public SimpleReader(String[] strings) {
+ this.iterator = Arrays.asList(strings).iterator();
+ }
+
+ public Word next() throws IOException {
+ if(! iterator.hasNext())
+ return null;
+ return new Word(field, (String)iterator.next(), new Integer(1));
+ }
+
+ public String getField() {
+ return field;
+ }
+
+ public void setField(String field) {
+ this.field = field;
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/SuggestiveSearcher.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/SuggestiveSearcher.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/SuggestiveSearcher.java (revision 0)
@@ -0,0 +1,75 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.Sort;
+
+/**
+ * @author Mathieu Lecarme
+ */
+public class SuggestiveSearcher extends AbstractSearcher{
+ private int thresold = 10;
+
+ /**
+ * @param searcher
+ */
+ public SuggestiveSearcher(Searcher searcher, Lexicon lexicon) {
+ super();
+ this.searcher = searcher;
+ this.lexicon = lexicon;
+ }
+
+ /**
+ * @return the thresold
+ */
+ public int getThresold() {
+ return thresold;
+ }
+
+ /**
+ * @param thresold the thresold to set
+ */
+ public void setThresold(int thresold) {
+ this.thresold = thresold;
+ }
+
+ public SuggestiveHits searchWithSuggestions(Query query) throws IOException {
+ return searchWithSuggestions(query, null, null);
+ }
+
+ public SuggestiveHits searchWithSuggestions(Query query, Filter filter) throws IOException {
+ return searchWithSuggestions(query, filter, null);
+ }
+
+ public SuggestiveHits searchWithSuggestions(Query query, Sort sort) throws IOException {
+ return searchWithSuggestions(query, null, sort);
+ }
+
+ public SuggestiveHits searchWithSuggestions(Query query, Filter filter, Sort sort) throws IOException {
+ Hits hits = this.searcher.search(query, filter, sort);
+ if(hits.length() > thresold)
+ return new SuggestiveHits(hits, null, null);
+ Set terms = QueryUtils.extractTerms(query);
+ if(terms == null)
+ return new SuggestiveHits(hits, null, null);
+ Map suggestions = new HashMap();
+ Iterator iterator = terms.iterator();
+ while(iterator.hasNext()) {
+ Term term = (Term)iterator.next();
+ suggestions.put(term.text(), lexicon.findNear(term));
+ }
+ return new SuggestiveHits(hits, suggestions, query);
+ }
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/TermQueryFilter.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/TermQueryFilter.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/TermQueryFilter.java (revision 0)
@@ -0,0 +1,15 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+/**
+ * @author Mathieu Lecarme
+ *
+ */
+public interface TermQueryFilter {
+ public Query filter(TermQuery term);
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/StemmerAnalyzer.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/StemmerAnalyzer.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/StemmerAnalyzer.java (revision 0)
@@ -0,0 +1,94 @@
+package org.apache.lucene.lexicon.analyzer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+import net.sf.snowball.SnowballProgram;
+
+/**
+ * Find words with same stemming
+ * @see net.sf.snowball.SnowballProgram
+ * @author Mathieu Lecarme
+ *
+ */
+public class StemmerAnalyzer extends AbstractSimplifierAnalyzer{
+ private static final Object [] EMPTY_ARGS = new Object[0];
+ public static final String KEY = "stemmer.";
+ public static final String STEM = KEY + "stem";
+ private SnowballProgram stemmer;
+ private Method stemMethod;
+
+ public StemmerAnalyzer(String name) {
+ try {
+ Class stemClass =
+ Class.forName("net.sf.snowball.ext." + name + "Stemmer");
+ stemmer = (SnowballProgram) stemClass.newInstance();
+ // why doesn't the SnowballProgram class have an (abstract?) stem method?
+ stemMethod = stemClass.getMethod("stem", new Class[0]);
+ } catch (Exception e) {
+ throw new RuntimeException(e.toString());
+ }
+ }
+
+ /**
+ * @param word
+ * @return stemmed word
+ */
+ public String simplify(String word){
+ stemmer.setCurrent(word);
+ try {
+ stemMethod.invoke(stemmer, EMPTY_ARGS);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ return stemmer.getCurrent();
+ }
+
+ public String getKey() {
+ return KEY;
+ }
+
+ public String getSimpleKey() {
+ return STEM;
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((stemmer == null) ? 0 : stemmer.getClass().hashCode());
+ return result;
+ }
+
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ final StemmerAnalyzer other = (StemmerAnalyzer) obj;
+ if (stemmer == null) {
+ if (other.stemmer != null)
+ return false;
+ } else if (!stemmer.getClass().equals(other.stemmer.getClass()))
+ return false;
+ return true;
+ }
+
+}
\ No newline at end of file
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/SimilarAble.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/SimilarAble.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/SimilarAble.java (revision 0)
@@ -0,0 +1,16 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.analyzer;
+
+import org.apache.lucene.search.Query;
+
+/**
+ * Can find similar token
+ * @author mlecarme
+ *
+ */
+public interface SimilarAble {
+ public Query findSimilar(String word);
+ public String simplify(String word);
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/LexiconAnalyzer.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/LexiconAnalyzer.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/LexiconAnalyzer.java (revision 0)
@@ -0,0 +1,40 @@
+package org.apache.lucene.lexicon.analyzer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+
+/**
+ * A builder for lexicon entry.
+ * @author Mathieu Lecarme
+ *
+ */
+public interface LexiconAnalyzer {
+ /**
+ * for each Term indexed, n fields can be return.
+ * @param term
+ * @return
+ */
+ public Field[] getFields(Term term);
+
+ /**
+ * @return the distinct key for the Field.
+ */
+ public String getKey();
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/WordNetAnalyzer.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/WordNetAnalyzer.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/WordNetAnalyzer.java (revision 0)
@@ -0,0 +1,36 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.analyzer;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+
+/**
+ * @author mlecarme
+ *
+ */
+public class WordNetAnalyzer implements LexiconAnalyzer, SimilarAble{
+
+ public Field[] getFields(Term term) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public String getKey() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public Query findSimilar(String word) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public String simplify(String word) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AphoneAnalyser.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AphoneAnalyser.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AphoneAnalyser.java (revision 0)
@@ -0,0 +1,100 @@
+package org.apache.lucene.lexicon.analyzer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.lucene.aphone.Aphone;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+/**
+ * @author Mathieu Lecarme
+ *
+ */
+public class AphoneAnalyser extends NGramAnalyzer implements Distanceable, SimilarAble {
+ private Aphone aphone;
+ public static final String KEY = "aphone.";
+ public static final String PHONEM = KEY + "phonem";
+
+ /**
+ * @param aphone
+ */
+ public AphoneAnalyser(Aphone aphone) {
+ this.aphone = aphone;
+ }
+
+ public String getKey() {
+ return KEY;
+ }
+
+ public Field[] getFields(Term term) {
+ List fields = new ArrayList();
+ String phoneme = aphone.toPhone(term.text());
+ addNgram(phoneme, fields);
+ fields.add(
+ new Field(PHONEM, phoneme, Store.YES, Index.UN_TOKENIZED));
+ Field[] result = new Field[fields.size()];
+ fields.toArray(result);
+ return result;
+ }
+
+ public Query findSimilar(String word) {
+ return new TermQuery(
+ new Term(PHONEM, simplify(word)));
+ }
+
+ public String simplify(String word) {
+ return aphone.toPhone(word);
+ }
+
+ public int distance(String word, Document doc) {
+ return StringUtils.getLevenshteinDistance(aphone.toPhone(word), doc.get(PHONEM));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((aphone == null) ? 0 : aphone.getClass().hashCode());
+ return result;
+ }
+
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ final AphoneAnalyser other = (AphoneAnalyser) obj;
+ if (aphone == null) {
+ if (other.aphone != null)
+ return false;
+ } else if (!aphone.getClass().equals(other.aphone.getClass()))
+ return false;
+ return true;
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/NearAble.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/NearAble.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/NearAble.java (revision 0)
@@ -0,0 +1,15 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.analyzer;
+
+import org.apache.lucene.search.Query;
+
+/**
+ * Can be use for a "do you mean" suggestion
+ * @author Mathieu Lecarme
+ *
+ */
+public interface NearAble {
+ public Query findNear(String word);
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/NGramAnalyzer.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/NGramAnalyzer.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/NGramAnalyzer.java (revision 0)
@@ -0,0 +1,171 @@
+package org.apache.lucene.lexicon.analyzer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.ngram.NGramTokenizer;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+
+/**
+ * @see org.apache.lucene.analysis.ngram.NGramTokenizer
+ * @author Mathieu Lecarme
+ */
+public class NGramAnalyzer implements LexiconAnalyzer, NearAble {
+ public static final String KEY = "ngram.";
+ public static final String SIZE = "size";
+ public static final String GRAM = "gram";
+ public static final String START = "start";
+ public static final String END = "end";
+ protected int min = 2;
+ protected int max = 2;
+ protected int delta = 1; // delta in size for search
+
+ public NGramAnalyzer() {}
+
+ /**
+ * @param min
+ * @param max
+ */
+ public NGramAnalyzer(int min, int max, int delta) {
+ this.min = min;
+ this.max = max;
+ this.delta = delta;
+ }
+
+ public String getKey() {
+ return KEY;
+ }
+
+ protected Set ngram(String word) {
+ Set set = new LinkedHashSet();
+ NGramTokenizer tokenizer = new NGramTokenizer(new StringReader(word), min, max);
+ Token token;
+ for(;;){
+ try {
+ token = tokenizer.next();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ if(token == null)
+ break;
+ set.add(token.termText());
+ }
+ return set;
+ }
+
+ protected void addNgram(String word, List fields) {
+ Iterator iterator = ngram(word).iterator();
+ String currentGram = null;
+ while(iterator.hasNext()) {
+ String gram = (String)iterator.next();
+ if(currentGram == null)
+ fields.add(
+ new Field(getKey() + START, gram, Store.NO, Index.UN_TOKENIZED)
+ );
+ currentGram = gram;
+ fields.add(
+ new Field(getKey() + GRAM, gram, Store.NO, Index.UN_TOKENIZED)
+ );
+ }
+ fields.add(
+ new Field(getKey() + END, currentGram, Store.NO, Index.UN_TOKENIZED)
+ );
+ fields.add(
+ new Field(getKey() + SIZE, new Integer(word.length()).toString(), Store.NO, Index.UN_TOKENIZED)
+ );
+ }
+
+ public Field[] getFields(Term term) {
+ List fields = new ArrayList();
+ addNgram(term.text(), fields);
+ Field[] result = new Field[fields.size()];
+ fields.toArray(result);
+ return result;
+ }
+
+ public Query findNear(String word) {
+ BooleanQuery query = new BooleanQuery();
+ query.add(new ConstantScoreRangeQuery(
+ getKey() + SIZE,
+ new Integer(word.length() -1).toString(),
+ new Integer(word.length() + 1).toString(),
+ true, true), Occur.MUST);
+ BooleanQuery or = new BooleanQuery();
+ Iterator iterator = ngram(word).iterator();
+ String currentGram = null;
+ while(iterator.hasNext()) {
+ String gram = (String)iterator.next();
+ if(currentGram == null)
+ or.add(new TermQuery(
+ new Term(getKey() + START, gram)),
+ Occur.SHOULD);
+ currentGram = gram;
+ or.add(new TermQuery(
+ new Term(getKey() + GRAM , gram)),
+ Occur.SHOULD);
+ }
+ or.add(new TermQuery(
+ new Term(getKey() + END, currentGram)),
+ Occur.SHOULD);
+ query.add(or, Occur.MUST);
+ return query;
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + delta;
+ result = prime * result + max;
+ result = prime * result + min;
+ return result;
+ }
+
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ final NGramAnalyzer other = (NGramAnalyzer) obj;
+ if (delta != other.delta)
+ return false;
+ if (max != other.max)
+ return false;
+ if (min != other.min)
+ return false;
+ return true;
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AnagramAnalyzer.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AnagramAnalyzer.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AnagramAnalyzer.java (revision 0)
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.analyzer;
+
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/**
+ * Anagram analyzer
+ * Each distinct chars of a word are sorted.
+ * "Lucene" become "celnu"
+ * @author Mathieu Lecarme
+ * @see {http://en.wikipedia.org/wiki/Anagram}
+ */
+public class AnagramAnalyzer extends AbstractSimplifierAnalyzer {
+ public static final String KEY = "anagram.";
+ public static final String ANAGRAM = KEY + "anagram";
+
+ /**
+ * @param word
+ * @return the anagramed word
+ */
+ public String simplify(String word) {
+ SortedSet set = new TreeSet();
+ char[] chars = word.toCharArray();
+ for(int a = 0; a < chars.length; a++)
+ set.add(new Character(Character.toLowerCase(chars[a])));
+ StringBuffer buffer = new StringBuffer(set.size());
+ Iterator iterator = set.iterator();
+ while(iterator.hasNext())
+ buffer.append(iterator.next());
+ return buffer.toString();
+ }
+
+ public String getKey() {
+ return KEY;
+ }
+
+ public String getSimpleKey() {
+ return ANAGRAM;
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AbstractSimplifierAnalyzer.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AbstractSimplifierAnalyzer.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/AbstractSimplifierAnalyzer.java (revision 0)
@@ -0,0 +1,37 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon.analyzer;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+/**
+ * @author Mathieu Lecarme
+ *
+ */
+public abstract class AbstractSimplifierAnalyzer implements LexiconAnalyzer, SimilarAble, NearAble{
+ public abstract String getSimpleKey();
+ public abstract String simplify(String word);
+
+ public Field[] getFields(Term term) {
+ if(term == null)
+ return null;
+ return new Field[] {
+ new Field(getSimpleKey(), simplify(term.text()), Store.NO, Index.UN_TOKENIZED)};
+ }
+
+ public Query findSimilar(String word) {
+ return new TermQuery(
+ new Term(getSimpleKey(), simplify(word)));
+ }
+
+ public Query findNear(String word) {
+ return findSimilar(word);
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/Distanceable.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/Distanceable.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/analyzer/Distanceable.java (revision 0)
@@ -0,0 +1,12 @@
+package org.apache.lucene.lexicon.analyzer;
+
+import org.apache.lucene.document.Document;
+
+/**
+ * This LexiconAnalyzer computes the distance between suggested word
+ * @see LexiconAnalyzer
+ * @author Mathieu Lecarme
+ */
+public interface Distanceable {
+ public int distance(String word, Document doc);
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/Suggestive.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/Suggestive.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/Suggestive.java (revision 0)
@@ -0,0 +1,134 @@
+package org.apache.lucene.lexicon;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * A suggested word
+ * @see Suggestions
+ * @author Mathieu Lecarme
+ */
+public class Suggestive implements Comparable{
+ private String word;
+ private Integer distance;
+ private Float score;
+ private Integer popularity;
+
+ /**
+ * @param word
+ * @param distance
+ * @param popularity
+ */
+ public Suggestive(String word, Integer distance, Float score, Integer popularity) {
+ super();
+ this.word = word;
+ this.distance = distance;
+ this.score= score;
+ this.popularity = popularity;
+ }
+
+ public void setWord(String word) {
+ this.word = word;
+ }
+
+ public String getWord() {
+ return word;
+ }
+
+ public void setDistance(Integer distance) {
+ this.distance = distance;
+ }
+
+ public Integer getDistance() {
+ return distance;
+ }
+
+ public void setPopularity(Integer popularity) {
+ this.popularity = popularity;
+ }
+
+ public Integer getPopularity() {
+ return popularity;
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result
+ + ((distance == null) ? 0 : distance.hashCode());
+ result = prime * result
+ + ((popularity == null) ? 0 : popularity.hashCode());
+ result = prime * result + ((word == null) ? 0 : word.hashCode());
+ return result;
+ }
+
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ final Suggestive other = (Suggestive) obj;
+ if (distance == null) {
+ if (other.distance != null)
+ return false;
+ } else if (!distance.equals(other.distance))
+ return false;
+ if (popularity == null) {
+ if (other.popularity != null)
+ return false;
+ } else if (!popularity.equals(other.popularity))
+ return false;
+ if (word == null) {
+ if (other.word != null)
+ return false;
+ } else if (!word.equals(other.word))
+ return false;
+ return true;
+ }
+
+ public int compareTo(Object arg0) {
+ if(arg0 == null)
+ return -1;
+ Suggestive other = (Suggestive)arg0;
+ if(!getDistance().equals(other.getDistance()))
+ return getDistance().compareTo(other.getDistance());
+ if(!getScore().equals(other.getScore()))
+ return getScore().compareTo(other.getScore());
+ if(! getPopularity().equals(other.getPopularity()))
+ return getPopularity().compareTo(other.getPopularity());
+ return getWord().compareTo(other.getWord());
+ }
+
+ /**
+ * @return the score
+ */
+ public Float getScore() {
+ return score;
+ }
+
+ /**
+ * @param score the score to set
+ */
+ public void setScore(Float score) {
+ this.score = score;
+ }
+
+ public String toString() {
+ return "";
+ }
+}
\ No newline at end of file
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/QueryUtils.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/QueryUtils.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/QueryUtils.java (revision 0)
@@ -0,0 +1,82 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+
+/**
+ * @author Mathieu Lecarme
+ *
+ */
+public final class QueryUtils {
+ /**
+ * extract all Term from TermQuery, recursively
+ * @param query
+ * @return
+ */
+ public static final Set extractTerms(Query query) {
+ Set terms = new HashSet();
+ if(query instanceof TermQuery) {
+ terms.add(((TermQuery)query).getTerm());
+ return terms;
+ }
+ if(query instanceof BooleanQuery){
+ Iterator iterator = ((BooleanQuery)query).clauses().iterator();
+ while(iterator.hasNext()) {
+ Set subTerms = extractTerms(((BooleanClause)iterator.next()).getQuery());
+ if(subTerms != null)
+ terms.addAll(subTerms);
+ }
+ return terms;
+ }
+ return null;
+ }
+
+ public static final Query filter(Query query, TermQueryFilter filter) {
+ if(query instanceof TermQuery)
+ return filter.filter((TermQuery)query);
+ if(! (query instanceof BooleanQuery))
+ return query;
+ BooleanQuery filteredQuery = new BooleanQuery();
+ filteredQuery.setBoost(query.getBoost());
+ Iterator iterator = ((BooleanQuery)query).clauses().iterator();
+ while(iterator.hasNext()){
+ BooleanClause clause = (BooleanClause)iterator.next();
+ clause.setQuery(QueryUtils.filter((Query)clause.getQuery().clone(), filter));
+ filteredQuery.add(clause);
+ }
+ return filteredQuery;
+ }
+
+ public static Query buildSimilarQuery(Query query, final Lexicon lexicon, final float coeff) {
+ return filter(query, new TermQueryFilter() {
+ public Query filter(TermQuery term) {
+ String[] similars;
+ try {
+ similars = lexicon.findSimilar(term.getTerm().text());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ BooleanQuery similarQuery = new BooleanQuery();
+ similarQuery.add(term, Occur.MUST);
+ for(int a=0; a < similars.length; a++){
+ TermQuery tq = new TermQuery(new Term(term.getTerm().field(), similars[a]));
+ tq.setBoost(coeff * term.getBoost());
+ similarQuery.add(tq, Occur.SHOULD);
+ }
+ return similarQuery;
+ }
+ });
+ }
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/DummyStream.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/DummyStream.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/DummyStream.java (revision 0)
@@ -0,0 +1,66 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A dummy token stream
+ * Useful for TokenFilter with String input.
+ * @see org.apache.lucene.analysis.TokenStream
+ * @author Mathieu Lecarme
+ */
+public class DummyStream extends TokenStream {
+ private List words = new ArrayList();
+
+ public DummyStream() {}
+
+ /**
+ * @param word
+ */
+ public DummyStream(String word) {
+ super();
+ this.add(word);
+ }
+
+ public DummyStream(Collection words) {
+ super();
+ this.addAll(words);
+ }
+
+ public void add(String word) {
+ this.words.add(word);
+ }
+
+ public void addAll(Collection words) {
+ this.words.addAll(words);
+ }
+
+ public boolean isEmpty(){
+ return words.isEmpty();
+ }
+
+ public void clear(){
+ words.clear();
+ }
+
+ public int size() {
+ return words.size();
+ }
+
+ public Token next() throws IOException {
+ if(words.isEmpty())
+ return null;
+ Token token = new Token((String)words.get(0), 0, 0);
+ words.remove(0);
+ return token;
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/SimilarTokenFilter.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/SimilarTokenFilter.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/SimilarTokenFilter.java (revision 0)
@@ -0,0 +1,70 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.io.IOException;
+import java.util.Stack;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.lexicon.analyzer.SimilarAble;
+
+/**
+ * Generic token filter wich provide similar token
+ * @author Mathieu Lecarme
+ *
+ */
+public class SimilarTokenFilter extends TokenFilter{
+ private Lexicon lexicon;
+ private Stack stack = new Stack();
+ private String[] fields = null;
+ private SimilarAble[] similars = null;
+ private Token currentToken;
+
+ /**
+ * A simple similar token filter
+ * @param input a stream
+ * @param lexicon
+ */
+ protected SimilarTokenFilter(TokenStream input, Lexicon lexicon) {
+ super(input);
+ this.lexicon = lexicon;
+ }
+
+ /**
+ * A complete similar token filter. Only fields used during the lexicon
+ * construction and Similarable analyzer are used for this similarity search
+ * @param input
+ * @param lexicon
+ * @param fields wich are used during the indexation
+ * @param similars used during the search.
+ */
+ protected SimilarTokenFilter(TokenStream input, Lexicon lexicon, String[] fields, SimilarAble[] similars) {
+ super(input);
+ this.lexicon = lexicon;
+ this.fields = fields;
+ this.similars = similars;
+ }
+
+ public Token next() throws IOException {
+ if(stack.isEmpty()){
+ currentToken = this.input.next();
+ if(currentToken == null)
+ return null;
+ String word = currentToken.termText();
+ String[] sims = lexicon.findSimilar(word, fields, similars);
+ for(int a=0; a < sims.length; a++){
+ String similar = sims[a];
+ stack.add(similar);
+ if( similar.equals(word))
+ word = null;
+ }
+ if(word != null)
+ stack.add(word);
+ }
+ return new Token((String)stack.pop(), currentToken.startOffset(), currentToken.endOffset());
+ }
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/Suggestions.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/Suggestions.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/Suggestions.java (revision 0)
@@ -0,0 +1,71 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.util.Iterator;
+import java.util.TreeSet;
+
+/**
+ * A sorted set of suggestion, with a max size, and a distanceMax
+ * @author Mathieu Lecarme
+ */
+public class Suggestions extends TreeSet {
+ private static final long serialVersionUID = 4038368090229602687L;
+ private int distanceMax = 3;
+ private int sizeMax = 10;
+
+ public boolean add(Object arg0) {
+ Suggestive suggestive = (Suggestive)arg0;
+ if(suggestive.getDistance().intValue() > distanceMax)
+ return false;
+ boolean modif = super.add(arg0);
+ if(modif && size() > sizeMax)
+ remove(last());
+ return modif;
+ }
+
+ /**
+ * @return the distanceMax
+ */
+ public int getDistanceMax() {
+ return distanceMax;
+ }
+
+ /**
+ * @param distanceMax the distanceMax to set
+ */
+ public void setDistanceMax(int distanceMax) {
+ this.distanceMax = distanceMax;
+ }
+
+ /**
+ * @return the sizeMax
+ */
+ public int getSizeMax() {
+ return sizeMax;
+ }
+
+ /**
+ * @param sizeMax the sizeMax to set
+ */
+ public void setSizeMax(int sizeMax) {
+ this.sizeMax = sizeMax;
+ }
+
+ public Iterator getWordIterator() {
+ final Iterator iterator = iterator();
+ return new Iterator(){
+ public boolean hasNext(){
+ return iterator.hasNext();
+ }
+ public Object next() {
+ return ((Suggestive)iterator.next()).getWord();
+ }
+ public void remove() {
+ iterator.remove();
+ }
+ };
+ }
+
+}
\ No newline at end of file
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/AbstractTextTermQueryFilter.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/AbstractTextTermQueryFilter.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/AbstractTextTermQueryFilter.java (revision 0)
@@ -0,0 +1,26 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+/**
+ * @author mlecarme
+ *
+ */
+public abstract class AbstractTextTermQueryFilter implements TermQueryFilter{
+
+ public Query filter(TermQuery termQuery) {
+ Term term = termQuery.getTerm();
+ Term filteredTerm = new Term(term.field(), filter(term.text()));
+ TermQuery filteredTermQuery = new TermQuery(filteredTerm);
+ filteredTermQuery.setBoost(termQuery.getBoost());
+ return filteredTermQuery;
+ }
+
+ public abstract String filter(String txt);
+
+}
Index: contrib/lexicon/src/java/org/apache/lucene/lexicon/SuggestiveHits.java
===================================================================
--- contrib/lexicon/src/java/org/apache/lucene/lexicon/SuggestiveHits.java (revision 0)
+++ contrib/lexicon/src/java/org/apache/lucene/lexicon/SuggestiveHits.java (revision 0)
@@ -0,0 +1,67 @@
+/**
+ *
+ */
+package org.apache.lucene.lexicon;
+
+import java.util.Map;
+
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.Query;
+
+/**
+ * @author mlecarme
+ *
+ */
+public class SuggestiveHits {
+ private Hits hits;
+ private Map suggestions;// String -> Suggestions
+ private Query query;
+
+ /**
+ * @param hits
+ * @param suggestions
+ */
+ public SuggestiveHits(Hits hits, Map suggestions, Query query) {
+ this.hits = hits;
+ this.suggestions = suggestions;
+ this.query = query;
+ }
+
+ /**
+ * @return the hits
+ */
+ public Hits getHits() {
+ return hits;
+ }
+
+ /**
+ * @return the suggestions
+ */
+ public Suggestions getSuggestions(String word) {
+ return (Suggestions)suggestions.get(word);
+ }
+
+ public Map getSuggestions() {
+ return suggestions;
+ }
+
+ public String getSuggestedQuery() {
+ if(! isSuggested())
+ return null;
+ return QueryUtils.filter(this.query, new AbstractTextTermQueryFilter() {
+ public String filter(String txt) {
+ if(! suggestions.containsKey(txt))
+ return txt;
+ Suggestions suggested = (Suggestions)suggestions.get(txt);
+ if(suggested.isEmpty())
+ return txt;
+ return (String)suggested.getWordIterator().next();
+ }
+ }).toString();
+ }
+
+ public boolean isSuggested() {
+ return suggestions != null && suggestions.size() > 0;
+ }
+
+}
Index: contrib/lexicon/pom.xml
===================================================================
Index: contrib/lexicon/build.xml
===================================================================
--- contrib/lexicon/build.xml (revision 0)
+++ contrib/lexicon/build.xml (revision 0)
@@ -0,0 +1,84 @@
+
+
+
+
+
+
+
+ Lexicon
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ XML Parser building dependency ${spellchecker.jar}
+
+
+
+
+ XML Parser building dependency ${aphone.jar}
+
+
+
+
+ XML Parser building dependency ${snowball.jar}
+
+
+
+
+ XML Parser building dependency ${analyzers.jar}
+
+
+
+
+
+
+
+
+
+
Index: contrib/aphone/src/test/org/apache/lucene/aphone/TestAphoneTokenFilter.java
===================================================================
--- contrib/aphone/src/test/org/apache/lucene/aphone/TestAphoneTokenFilter.java (revision 0)
+++ contrib/aphone/src/test/org/apache/lucene/aphone/TestAphoneTokenFilter.java (revision 0)
@@ -0,0 +1,46 @@
+package org.apache.lucene.aphone;
+
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+/**
+ *
+ * @Author Mathieu Lecarme
+ */
+public class TestAphoneTokenFilter extends TestCase{
+ public void testFrench() throws IOException{
+ String test = "Les phonétiques vont sauver le monde";
+ TokenStream stream = new AphoneTokenFilter ( new StandardTokenizer(new StringReader(test)), new AphoneFr());
+ Token tok;
+ for(;;){
+ tok = stream.next();
+ if(tok == null)
+ break;
+ System.out.println(tok.termText());
+ }
+ }
+}
Index: contrib/aphone/src/test/org/apache/lucene/aphone/TestAphone.java
===================================================================
--- contrib/aphone/src/test/org/apache/lucene/aphone/TestAphone.java (revision 0)
+++ contrib/aphone/src/test/org/apache/lucene/aphone/TestAphone.java (revision 0)
@@ -0,0 +1,38 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+
+/**
+ *
+ * @Author Mathieu Lecarme
+ */
+public class TestAphone extends TestCase {
+
+ public void testFrench() {
+ AphoneFr fr = new AphoneFr();
+ assertEquals("LUSEME", fr.toPhone("Lucene"));
+ }
+
+ public void testEnglish() {
+ AphoneEn en = new AphoneEn();
+ assertEquals("LSN", en.toPhone("Lucene"));
+ }
+
+}
\ No newline at end of file
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneDa.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneDa.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneDa.java (revision 0)
@@ -0,0 +1,475 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneDa extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 2 && word.substring(0, 2).equals("AA") ) {
+ this.append("Å");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("ACTION") ) {
+ this.append("AKSJON");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AF") ) {
+ this.append("AV");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ASIE") ) {
+ this.append("ASJE");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("A") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("BEDST") ) {
+ this.append("BEST");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("BORD") ) {
+ this.append("BOR");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("BRYST") ) {
+ this.append("BRØST");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("BUREAU") ) {
+ this.append("BYRO");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("B") ) {
+ this.append("B");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CC") ) {
+ this.append("KS");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CK") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CH") ) {
+ this.append("TJ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CI") ) {
+ this.append("SI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CO") ) {
+ this.append("KO");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CY") ) {
+ this.append("SY");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("C") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("DIG") && word.length() == 3) {
+ this.append("DAJ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("DIG") ) {
+ this.append("DI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("D") && word.length() == 1) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("D") ) {
+ this.append("D");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("EAUX") ) {
+ this.append("O");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EAU") ) {
+ this.append("O");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EJ") && word.length() == 2) {
+ this.append("AJ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EU") ) {
+ this.append("ØV");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("E") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("É") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("È") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("FEDT") ) {
+ this.append("FET");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("F") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("G") ) {
+ this.append("G");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("HJ") ) {
+ this.append("J");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("HÅRD") ) {
+ this.append("HÅR");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("HÅND") ) {
+ this.append("HÅN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("H") ) {
+ this.append("H");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ION") ) {
+ this.append("JON");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("IND") ) {
+ this.append("IN");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("I") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("J") ) {
+ this.append("J");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("K") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("LIG") ) {
+ this.append("LI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("L") ) {
+ this.append("L");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("MAND") ) {
+ this.append("MAN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("MIG") && word.length() == 3) {
+ this.append("MAJ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("M") ) {
+ this.append("M");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("N") ) {
+ this.append("N");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OST") ) {
+ this.append("ÅST");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("O") ) {
+ this.append("O");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ó") ) {
+ this.append("O");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PH") ) {
+ this.append("F");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("P") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Q") ) {
+ this.append("KU");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("REGN") ) {
+ this.append("REJN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("RUG") ) {
+ this.append("RU");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("RYG") ) {
+ this.append("RØG");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("R") ) {
+ this.append("R");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SH") ) {
+ this.append("SJ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("SIG") && word.length() == 3) {
+ this.append("SAJ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("SKIND") ) {
+ this.append("SKIN");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("S'S") && word.length() == 3) {
+ this.append("S");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("TION") ) {
+ this.append("SJON");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TZ") ) {
+ this.append("TS");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("T") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("U") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ü") ) {
+ this.append("Y");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("V") ) {
+ this.append("V");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("W") ) {
+ this.append("V");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("X'S") ) {
+ this.append("KS");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("X") ) {
+ this.append("KS");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("YKK") ) {
+ this.append("ØKK");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("YND") ) {
+ this.append("ØND");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Y") ) {
+ this.append("Y");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("Z'S") ) {
+ this.append("S");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Z") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Æ") ) {
+ this.append("Æ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ä") ) {
+ this.append("Æ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ØB") ) {
+ this.append("ØV");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ø") ) {
+ this.append("Ø");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ö") ) {
+ this.append("Ø");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Å") ) {
+ this.append("Å");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneDa aphone = new AphoneDa();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneFo.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneFo.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneFo.java (revision 0)
@@ -0,0 +1,435 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneFo extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 2 && word.substring(0, 2).equals("AA") ) {
+ this.append("Å");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("AFT") ) {
+ this.append("AT");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AH") && word.length() == 2) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("A") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Á") ) {
+ this.append("Á");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("B") ) {
+ this.append("B");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CC") ) {
+ this.append("KK");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CK") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("CHR") ) {
+ this.append("KR");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CH") ) {
+ this.append("SJ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CI") ) {
+ this.append("SI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CO") ) {
+ this.append("KO");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CY") ) {
+ this.append("SY");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("C") ) {
+ this.append("C");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("D") ) {
+ this.append("D");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ÐUR") ) {
+ this.append("VUR");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ð") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("EAUX") ) {
+ this.append("O");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EAU") ) {
+ this.append("O");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EUS") ) {
+ this.append("ØVS");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("E") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("É") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("È") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("F") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("G") ) {
+ this.append("G");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("HJ") ) {
+ this.append("J");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("HÅRD") ) {
+ this.append("HÅR");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("HÅND") ) {
+ this.append("HÅN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("H") ) {
+ this.append("H");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("IÐ") && word.length() == 2) {
+ this.append("Í");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("IÐ") && word.length() == 2) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("IND") ) {
+ this.append("IN");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("I") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Í") ) {
+ this.append("Í");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("J") ) {
+ this.append("J");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("KE") ) {
+ this.append("TJE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("K") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("LIG") ) {
+ this.append("LI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("L") ) {
+ this.append("L");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("M") ) {
+ this.append("M");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("N") ) {
+ this.append("N");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OCH") ) {
+ this.append("OK");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("O") ) {
+ this.append("O");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ó") ) {
+ this.append("Ó");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PH") ) {
+ this.append("F");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("P") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Q") ) {
+ this.append("KU");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("R") ) {
+ this.append("R");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SH") ) {
+ this.append("SJ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("SIÓN") ) {
+ this.append("SJÓN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("S'S") && word.length() == 3) {
+ this.append("S");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TH") && word.length() == 2) {
+ this.append("T");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("TIÓN") ) {
+ this.append("SJÓN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("T") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("U") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ú") ) {
+ this.append("Ú");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ü") ) {
+ this.append("Y");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("V") ) {
+ this.append("V");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("W") ) {
+ this.append("V");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("X'S") && word.length() == 3) {
+ this.append("KS");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("X") ) {
+ this.append("KS");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Y") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ý") ) {
+ this.append("Í");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("Z'S") && word.length() == 3) {
+ this.append("S");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Z") && word.length() == 1) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Z") ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Æ") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ä") ) {
+ this.append("Æ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ØRN") ) {
+ this.append("ØDN");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ø") ) {
+ this.append("Ø");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ö") ) {
+ this.append("Ø");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Å") ) {
+ this.append("Á");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneFo aphone = new AphoneFo();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneRu.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneRu.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneRu.java (revision 0)
@@ -0,0 +1,595 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneRu extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 2 && word.substring(0, 1).equals("А") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("АЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("А") ) {
+ this.append("А");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Б") && "БП".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Б") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("БА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("БЬ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("БЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("БЪ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("БЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Б") ) {
+ this.append("П");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("В") && "ВФ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("В") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ВА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ВЬ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ВЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ВЪ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ВЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("В") ) {
+ this.append("Ф");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Г") && "ГК".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Г") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ГА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ГЬ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ГЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ГЪ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ГЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Г") ) {
+ this.append("К");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Д") && "ДТ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Д") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ДА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ДЬ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ДЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ДЪ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ДЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ДЗ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ДЗА");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Д") && "ЗС".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Ц");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Д") ) {
+ this.append("Т");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("Е") ) {
+ this.append("Я");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Е") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЯЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Е") ) {
+ this.append("Я");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("Ё") ) {
+ this.append("Я");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ё") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЯЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ё") ) {
+ this.append("Я");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ж") && "ЖШ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ж") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЖА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЖЬ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ЖЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЖЪ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ЖЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ж") ) {
+ this.append("Ш");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("З") && "ЗС".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("З") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЗА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЗЬ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ЗЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЗЪ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ЗЯ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("З") ) {
+ this.append("С");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("И") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("АЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("И") ) {
+ this.append("А");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЙЙ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Й") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Я");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Й") ) {
+ this.append("Й");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("К") && "ГК".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("К") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("КА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("К") ) {
+ this.append("К");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЛЛ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Л") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЛА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Л") ) {
+ this.append("Л");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ММ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("М") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("МА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("М") ) {
+ this.append("М");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("НН") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Н") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("НА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Н") ) {
+ this.append("Н");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("О") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("АЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("О") ) {
+ this.append("А");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("П") && "БП".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("П") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ПА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("П") ) {
+ this.append("П");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("РР") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Р") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("РА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Р") ) {
+ this.append("Р");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("С") && "ЗС".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("С") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("СА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("С") ) {
+ this.append("С");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Т") && "ДТ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Т") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ТА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ТЗ") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ТЗА");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Т") && "ЗС".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Ц");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Т") ) {
+ this.append("Т");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("У") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("АЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("У") ) {
+ this.append("А");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ф") && "ВФ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ф") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ФА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ф") ) {
+ this.append("Ф");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ХХ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Х") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ХА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Х") ) {
+ this.append("Х");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЦЦ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ц") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЦА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ц") ) {
+ this.append("Ц");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЧЧ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ч") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЧА");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ч") ) {
+ this.append("Ч");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ш") && "ЖШ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ш") && "АОЭЫУЯЁЕИЮ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ША");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ш") ) {
+ this.append("Ш");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Щ") ) {
+ this.append("Ш");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ъ") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Я");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ъ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ы") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("АЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ы") ) {
+ this.append("А");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ь") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Я");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ь") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Э") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("АЯ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Э") ) {
+ this.append("А");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("Ю") ) {
+ this.append("Я");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ю") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЯЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ю") ) {
+ this.append("Я");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("Я") ) {
+ this.append("Я");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Я") && "ЕЁЮЯ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ЯЯ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Я") ) {
+ this.append("Я");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneRu aphone = new AphoneRu();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneTokenFilter.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneTokenFilter.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneTokenFilter.java (revision 0)
@@ -0,0 +1,45 @@
+package org.apache.lucene.aphone;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class AphoneTokenFilter extends TokenFilter {
+ private Aphone aphone;
+ protected AphoneTokenFilter(TokenStream input) {
+ super(input);
+ this.aphone = new AphoneEn();
+ }
+ public AphoneTokenFilter(TokenStream input, Aphone aphone){
+ super(input);
+ this.aphone = aphone;
+ }
+ public Token next() throws IOException {
+ Token t = input.next();
+ if (t == null)
+ return null;
+ //[FIXME] using #termBuffer for Lucene 2.4
+ t.setTermText(aphone.toPhone(t.termText()));
+ return t;
+ }
+}
\ No newline at end of file
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneFr.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneFr.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneFr.java (revision 0)
@@ -0,0 +1,500 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneFr extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 3 && word.substring(0, 3).equals("AIX") && word.length() == 3) {
+ this.append("E");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AI") ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("AN") && "AEUIO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("AM");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AN") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AMM") ) {
+ this.append("AM");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("AM") && "AEUIO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("AM");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AM") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AUD") && word.length() == 3) {
+ this.append("O");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AUX") && word.length() == 3) {
+ this.append("O");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AU") ) {
+ this.append("O");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("A") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Â") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("À") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("BB") ) {
+ this.append("P");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("B") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ç") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("C") && "EI".indexOf(word.charAt(1)) != -1 ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CU") && "EI".indexOf(word.charAt(2)) != -1 ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CC") && "EI".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CC") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CH") ) {
+ this.append("CH");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("C") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("DD") ) {
+ this.append("T");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("D") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 8 && word.substring(0, 8).equals("EMMENTAL") ) {
+ this.append("EMATAL");
+ this.word = this.word.substring(8);
+ return true;
+ }
+ if( word.length() >= 9 && word.substring(0, 9).equals("EMMENTHAL") ) {
+ this.append("EMATAL");
+ this.word = this.word.substring(9);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EM") && "AEIOU".indexOf(word.charAt(2)) != -1 ) {
+ this.append("EM");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EM") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ET") && word.length() == 2) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EUX") && word.length() == 3) {
+ this.append("E");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EU") ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EN") && "AEUIO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("EM");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EN") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ER") && word.length() == 2) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EO") ) {
+ this.append("O");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("EAUX") && word.length() == 4) {
+ this.append("O");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EAU") ) {
+ this.append("O");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("E") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("È") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("É") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ê") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("F") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("G") && "EIY".indexOf(word.charAt(1)) != -1 ) {
+ this.append("J");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("GU") && "EIY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("G");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("G") ) {
+ this.append("G");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("H") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("I") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Î") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("J") ) {
+ this.append("J");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("KS") ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("K") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("LL") ) {
+ this.append("L");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("L") ) {
+ this.append("L");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("MM") ) {
+ this.append("M");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("M") ) {
+ this.append("M");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("NN") ) {
+ this.append("M");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("N") ) {
+ this.append("M");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OEU") ) {
+ this.append("E");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OUX") && word.length() == 3) {
+ this.append("U");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("OU") ) {
+ this.append("U");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("OÙ") ) {
+ this.append("U");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("O") ) {
+ this.append("O");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ô") ) {
+ this.append("O");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PP") ) {
+ this.append("P");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PH") ) {
+ this.append("F");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("P") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("QU") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Q") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("RIX") && word.length() == 3) {
+ this.append("RI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("RR") ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("R") ) {
+ this.append("R");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") && word.length() == 1) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SS") ) {
+ this.append("S");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TT") ) {
+ this.append("T");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("T") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("U") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ù") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Û") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("V") ) {
+ this.append("V");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("W") ) {
+ this.append("W");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("X") ) {
+ this.append("X");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Y") && "AEOU".indexOf(word.charAt(1)) != -1 ) {
+ this.append("IL");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Y") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ZZ") ) {
+ this.append("S");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Z") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneFr aphone = new AphoneFr();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneBg.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneBg.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneBg.java (revision 0)
@@ -0,0 +1,710 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneBg extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 1 && word.substring(0, 1).equals("А") ) {
+ this.append("Ъ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ъ") ) {
+ this.append("Ъ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("О") ) {
+ this.append("У");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("У") ) {
+ this.append("У");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Е") ) {
+ this.append("И");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("И") ) {
+ this.append("И");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ю") ) {
+ this.append("У");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Я") ) {
+ this.append("Ъ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Й") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ь") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("БСК") ) {
+ this.append("ПК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("БД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("П");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("БT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("П");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("БД") && word.length() == 2) {
+ this.append("П");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("БT") && word.length() == 2) {
+ this.append("П");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Б") ) {
+ this.append("П");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ВСК") ) {
+ this.append("ФК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ВД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ф");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ВT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ф");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ВД") && word.length() == 2) {
+ this.append("Ф");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ВT") && word.length() == 2) {
+ this.append("Ф");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("В") ) {
+ this.append("Ф");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ГСК") ) {
+ this.append("К");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ГД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("К");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ГT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("К");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ГД") && word.length() == 2) {
+ this.append("К");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ГT") && word.length() == 2) {
+ this.append("К");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Г") ) {
+ this.append("К");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ДСК") ) {
+ this.append("ТК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ДД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Т");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ДT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Т");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ДД") && word.length() == 2) {
+ this.append("Т");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ДT") && word.length() == 2) {
+ this.append("Т");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Д") ) {
+ this.append("Т");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ЖСК") ) {
+ this.append("ШК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЖД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ш");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЖT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ш");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЖД") && word.length() == 2) {
+ this.append("Ш");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЖT") && word.length() == 2) {
+ this.append("Ш");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ж") ) {
+ this.append("Ш");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ЗСК") ) {
+ this.append("СК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЗД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("С");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЗT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("С");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЗД") && word.length() == 2) {
+ this.append("С");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЗT") && word.length() == 2) {
+ this.append("С");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("З") ) {
+ this.append("С");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("КСК") ) {
+ this.append("К");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("КД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("К");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("КT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("К");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("КД") && word.length() == 2) {
+ this.append("К");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("КT") && word.length() == 2) {
+ this.append("К");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("К") ) {
+ this.append("К");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ЛСК") ) {
+ this.append("ЛК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЛД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Л");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЛT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Л");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЛД") && word.length() == 2) {
+ this.append("Л");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЛT") && word.length() == 2) {
+ this.append("Л");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Л") ) {
+ this.append("Л");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("МСК") ) {
+ this.append("МК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("МД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("М");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("МT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("М");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("МД") && word.length() == 2) {
+ this.append("М");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("МT") && word.length() == 2) {
+ this.append("М");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("М") ) {
+ this.append("М");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("НСК") ) {
+ this.append("НК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("НД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Н");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("НT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Н");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("НД") && word.length() == 2) {
+ this.append("Н");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("НT") && word.length() == 2) {
+ this.append("Н");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Н") ) {
+ this.append("Н");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ПСК") ) {
+ this.append("ПК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ПД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("П");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ПT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("П");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ПД") && word.length() == 2) {
+ this.append("П");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ПT") && word.length() == 2) {
+ this.append("П");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("П") ) {
+ this.append("П");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("РСК") ) {
+ this.append("РК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("РД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Р");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("РT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Р");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("РД") && word.length() == 2) {
+ this.append("Р");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("РT") && word.length() == 2) {
+ this.append("Р");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Р") ) {
+ this.append("Р");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ССК") ) {
+ this.append("СК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("СД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("С");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("СT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("С");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("СД") && word.length() == 2) {
+ this.append("С");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("СT") && word.length() == 2) {
+ this.append("С");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("С") ) {
+ this.append("С");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ТСК") ) {
+ this.append("ТК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ТД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Т");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ТT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Т");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ТД") && word.length() == 2) {
+ this.append("Т");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ТT") && word.length() == 2) {
+ this.append("Т");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Т") ) {
+ this.append("Т");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ФСК") ) {
+ this.append("ФК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ФД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ф");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ФT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ф");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ФД") && word.length() == 2) {
+ this.append("Ф");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ФT") && word.length() == 2) {
+ this.append("Ф");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ф") ) {
+ this.append("Ф");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ХСК") ) {
+ this.append("ХК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ХД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Х");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ХT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Х");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ХД") && word.length() == 2) {
+ this.append("Х");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ХT") && word.length() == 2) {
+ this.append("Х");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Х") ) {
+ this.append("Х");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ЦСК") ) {
+ this.append("ЦК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЦД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ц");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЦT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ц");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЦД") && word.length() == 2) {
+ this.append("Ц");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЦT") && word.length() == 2) {
+ this.append("Ц");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ц") ) {
+ this.append("Ц");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ЧСК") ) {
+ this.append("ЧК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЧД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ч");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ЧT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ч");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЧД") && word.length() == 2) {
+ this.append("Ч");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ЧT") && word.length() == 2) {
+ this.append("Ч");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ч") ) {
+ this.append("Ч");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ШСК") ) {
+ this.append("ШК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ШД") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ш");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ШT") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Ш");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ШД") && word.length() == 2) {
+ this.append("Ш");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ШT") && word.length() == 2) {
+ this.append("Ш");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ш") ) {
+ this.append("Ш");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ЩСК") ) {
+ this.append("ШК");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Щ") && "БВГДЖЗКЛМНПРСТГХЦЧШЩ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Ш");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Щ") && word.length() == 1) {
+ this.append("Ш");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Щ") ) {
+ this.append("Щ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneBg aphone = new AphoneBg();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneDe.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneDe.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneDe.java (revision 0)
@@ -0,0 +1,2405 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneDe extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 3 && word.substring(0, 3).equals("ÄER") ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ÄU") ) {
+ this.append("EU");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ä") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("É") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ÖER") ) {
+ this.append("Ö");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ö") ) {
+ this.append("Ö");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("ÜBER") ) {
+ this.append("IPA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ÜER") ) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ü") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("ß") ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("ABELLE") && word.length() == 6) {
+ this.append("APL");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("ABELL") && word.length() == 5) {
+ this.append("APL");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 7 && word.substring(0, 7).equals("ABIENNE") && word.length() == 7) {
+ this.append("APIN");
+ this.word = this.word.substring(7);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ACEY") && word.length() == 4) {
+ this.append("AZI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AEU") ) {
+ this.append("EU");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AE") ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("AGNI") ) {
+ this.append("AKN");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("AGNIE") ) {
+ this.append("ANI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("AGN") && "AEOU".indexOf(word.charAt(3)) != -1 && word.length() == 4) {
+ this.append("ANI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AIA") ) {
+ this.append("AIA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AIE") && word.length() == 3) {
+ this.append("E");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 4).equals("AILL") && "EOU".indexOf(word.charAt(4)) != -1 ) {
+ this.append("ALI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("AINE") && word.length() == 4) {
+ this.append("EN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("AIRE") && word.length() == 4) {
+ this.append("ER");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AIR") ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("AISE") && word.length() == 4) {
+ this.append("EZ");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 8 && word.substring(0, 8).equals("AISSANCE") && word.length() == 8) {
+ this.append("EZANZ");
+ this.word = this.word.substring(8);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("AISSE") && word.length() == 5) {
+ this.append("EZ");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AIX") && word.length() == 3) {
+ this.append("EX");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("AJ") && "AÄEIOÖUÜ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("AKTIE") ) {
+ this.append("AXIE");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("ALO") && "IY".indexOf(word.charAt(3)) != -1 ) {
+ this.append("ALUI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 7 && word.substring(0, 6).equals("AMATEU") && "RS".indexOf(word.charAt(6)) != -1 ) {
+ this.append("ANATÖ");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 7 && word.substring(0, 7).equals("ANIELLE") && word.length() == 7) {
+ this.append("ANIL");
+ this.word = this.word.substring(7);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("ANTI") ) {
+ this.append("ANTI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("ANVER") ) {
+ this.append("ANFA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ATIA") && word.length() == 4) {
+ this.append("ATIA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 4).equals("ATIA") && "NS".indexOf(word.charAt(4)) != -1 ) {
+ this.append("ATI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("ATI") && "AÄOÖUÜ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("AZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("AUAU") ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("AUER") ) {
+ this.append("AUA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("AUF") ) {
+ this.append("AUF");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("AULT") && word.length() == 4) {
+ this.append("U");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("AUSSE") && word.length() == 5) {
+ this.append("UZ");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("AUS") && "ST".indexOf(word.charAt(3)) != -1 ) {
+ this.append("AUZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("AUS") ) {
+ this.append("AUZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("AUTO") ) {
+ this.append("AUTU");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("AUX") && "IY".indexOf(word.charAt(3)) != -1 ) {
+ this.append("AUX");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AUX") ) {
+ this.append("U");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AU") ) {
+ this.append("AU");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("AVIER") && word.length() == 5) {
+ this.append("AFIE");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("AYER") ) {
+ this.append("EI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("AY") && "AÄEIOÖUÜ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("A") && "IJY".indexOf(word.charAt(1)) != -1 ) {
+ this.append("EI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("A") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("BEA") && "BCMNRU".indexOf(word.charAt(3)) != -1 ) {
+ this.append("PEA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 4).equals("BEAT") && "AEIMORU".indexOf(word.charAt(4)) != -1 ) {
+ this.append("PEAT");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("BEIGE") && word.length() == 5) {
+ this.append("PEZ");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("BE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("PE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("BETTE") && word.length() == 5) {
+ this.append("PET");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("BIC") && word.length() == 3) {
+ this.append("PIZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 4).equals("BOWL") && "EI".indexOf(word.charAt(4)) != -1 ) {
+ this.append("PUL");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("BP") && "AÄEIOÖRUÜY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("P");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("BUDGET") ) {
+ this.append("PIKE");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("BUFFET") ) {
+ this.append("PIFE");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("BYLLE") && word.length() == 5) {
+ this.append("PILE");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("BYLL") && word.length() == 4) {
+ this.append("PIL");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("BYTE") ) {
+ this.append("PEIT");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("B") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CÄ") ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CÜ") && word.length() == 2) {
+ this.append("ZI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 4).equals("CACH") && "EI".indexOf(word.charAt(4)) != -1 ) {
+ this.append("KEZ");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("CAE") ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CA") && "IY".indexOf(word.charAt(2)) != -1 && word.length() == 3) {
+ this.append("ZEI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("CCH") ) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("CCE") ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CE") && "EIJUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("CENT") ) {
+ this.append("ZENT");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 5).equals("CERST") && "EI".indexOf(word.charAt(5)) != -1 ) {
+ this.append("KE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("CER") && word.length() == 3) {
+ this.append("ZA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CE") ) {
+ this.append("ZE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 4).equals("CHAO") && "ST".indexOf(word.charAt(4)) != -1 ) {
+ this.append("KAU");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 7 && word.substring(0, 7).equals("CHAMPIO") ) {
+ this.append("ZENPI");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 4).equals("CHAR") && "AI".indexOf(word.charAt(4)) != -1 ) {
+ this.append("KAR");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 4).equals("CHAU") && "CDFSVWXZ".indexOf(word.charAt(4)) != -1 ) {
+ this.append("ZU");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("CHE") && "CF".indexOf(word.charAt(3)) != -1 ) {
+ this.append("ZE");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("CHEM") ) {
+ this.append("KE");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("CHEQUE") ) {
+ this.append("ZEK");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("CHI") && "CFGPVW".indexOf(word.charAt(3)) != -1 ) {
+ this.append("ZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("CH") && "AEUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("CHK") ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("CH") && "LOR".indexOf(word.charAt(2)) != -1 ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("CHST") ) {
+ this.append("X");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CH") && "SßXZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CH") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("CIER") && word.length() == 4) {
+ this.append("ZIE");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("CYB") ) {
+ this.append("ZEI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("CY") ) {
+ this.append("ZI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("C") && "IJY".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("CKST") ) {
+ this.append("XT");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CK") && "SßXZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("C") && "CK".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 7 && word.substring(0, 7).equals("CLAUDET") ) {
+ this.append("KLU");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 8 && word.substring(0, 8).equals("CLAUDINE") && word.length() == 8) {
+ this.append("KLUTIN");
+ this.word = this.word.substring(8);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("COLE") && word.length() == 4) {
+ this.append("KUL");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("COUCH") ) {
+ this.append("KAUZ");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("CQUES") && word.length() == 5) {
+ this.append("K");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("CQUE") ) {
+ this.append("K");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("CREAT") ) {
+ this.append("KREA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("CST") ) {
+ this.append("XT");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("CS") ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("C") && "SßX".indexOf(word.charAt(1)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CT") && "SßXZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CZ") ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("C") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("D'H") ) {
+ this.append("T");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("D'S") && word.length() == 3) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 4).equals("DAVO") && "NR".indexOf(word.charAt(4)) != -1 && word.length() == 5) {
+ this.append("TAFU");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("DD") && "SZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("DEPOT") ) {
+ this.append("TEPU");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("DESIGN") ) {
+ this.append("TIZEIN");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("DE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("TE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("DETTE") && word.length() == 5) {
+ this.append("TET");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("DIC") && word.length() == 3) {
+ this.append("TIZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("DJ") && "AEIOU".indexOf(word.charAt(2)) != -1 ) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("DS") && "CH".indexOf(word.charAt(2)) != -1 ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("DST") ) {
+ this.append("ZT");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("DT") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("DUIS") ) {
+ this.append("TI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("DURCH") ) {
+ this.append("TURK");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("DZS") && "CH".indexOf(word.charAt(3)) != -1 ) {
+ this.append("T");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("D") && "SßZ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("D") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("EAULT") && word.length() == 5) {
+ this.append("U");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("EAUX") && word.length() == 4) {
+ this.append("U");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EAU") ) {
+ this.append("U");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EAV") ) {
+ this.append("IF");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EA") && "AÄEIOÖÜY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("EA");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EA") && word.length() == 2) {
+ this.append("EA");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EA") ) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("EBEN") ) {
+ this.append("EPN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EE") ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("EIEI") ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EIH") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("EILLE") && word.length() == 5) {
+ this.append("EI");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EI") ) {
+ this.append("EI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EJ") && word.length() == 2) {
+ this.append("EI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("EL") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EL") && "DKL".indexOf(word.charAt(2)) != -1 ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EL") && "MNT".indexOf(word.charAt(2)) != -1 && word.length() == 3) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("ELYNE") && word.length() == 5) {
+ this.append("ELINE");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ELYN") && word.length() == 4) {
+ this.append("ELIN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EL") && "AÄEIOÖUÜY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("EL");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EL") ) {
+ this.append("L");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("EM") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EM") && "DFKMPQT".indexOf(word.charAt(2)) != -1 ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EM") && "AÄEIOÖUÜY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EM") ) {
+ this.append("N");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("EN") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EN") && "CDGKQT".indexOf(word.charAt(2)) != -1 ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("ENZ") && "AEIOUY".indexOf(word.charAt(3)) != -1 ) {
+ this.append("EN");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EN") && "AÄEINOÖUÜY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("EN");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EN") ) {
+ this.append("N");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("ERH") && "AÄEIOÖUÜ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("ER");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("ER") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ER") && "AÄEIOÖUÜY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ER") && word.length() == 2) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ER") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("ETI") && "AÄOÖÜU".indexOf(word.charAt(3)) != -1 ) {
+ this.append("EZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("EUEU") ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("EUILLE") && word.length() == 6) {
+ this.append("Ö");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EUR") && word.length() == 3) {
+ this.append("ÖR");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EUX") ) {
+ this.append("Ö");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("EUYS") && word.length() == 4) {
+ this.append("EUZ");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EU") ) {
+ this.append("EU");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("EYER") ) {
+ this.append("EIA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EY") ) {
+ this.append("EI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("E") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("FANS") && word.length() == 4) {
+ this.append("FE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("FAN") && word.length() == 3) {
+ this.append("FE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("FAULT") ) {
+ this.append("FUL");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("FEE") && "DL".indexOf(word.charAt(3)) != -1 ) {
+ this.append("FI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("FEHLER") ) {
+ this.append("FELA");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("FE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("FE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("FOND") ) {
+ this.append("FUN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("FRAIN") && word.length() == 5) {
+ this.append("FRA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 7 && word.substring(0, 6).equals("FRISEU") && "RS".indexOf(word.charAt(6)) != -1 ) {
+ this.append("FRIZÖ");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("F") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("G'S") && word.length() == 3) {
+ this.append("X");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("GAGS") && word.length() == 4) {
+ this.append("KEX");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("GAG") && word.length() == 3) {
+ this.append("KEK");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("GD") ) {
+ this.append("KT");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("GEGEN") ) {
+ this.append("KEKN");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("GE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("KE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("GETTE") && word.length() == 5) {
+ this.append("KET");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("G") && "CK".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("GG") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("GI") && "AO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("GION") && word.length() == 4) {
+ this.append("KIUN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("GIUS") ) {
+ this.append("IU");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("GMBH") && word.length() == 4) {
+ this.append("GMPH");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("GNAC") && word.length() == 4) {
+ this.append("NIAK");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("GNON") && word.length() == 4) {
+ this.append("NIUN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("GN") && word.length() == 2) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("GONCAL") ) {
+ this.append("KUNZA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("GS") && "CH".indexOf(word.charAt(2)) != -1 ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("GST") ) {
+ this.append("XT");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("G") && "SßXZ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("GUCK") ) {
+ this.append("KU");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("GUI") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("G") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("HEAD") ) {
+ this.append("E");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("HE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("HE") && "LMN".indexOf(word.charAt(2)) != -1 ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("HEUR") && word.length() == 4) {
+ this.append("ÖR");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("H") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("IEC") && word.length() == 3) {
+ this.append("IZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("IEI") ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("IELL") ) {
+ this.append("IEL");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("IENNE") && word.length() == 5) {
+ this.append("IN");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("IERRE") && word.length() == 5) {
+ this.append("IER");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("IETTE") && word.length() == 5) {
+ this.append("IT");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("IEU") ) {
+ this.append("IÖ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("IE") ) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("IGHT") && word.length() == 4) {
+ this.append("EIT");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 4).equals("IGNI") && "EO".indexOf(word.charAt(4)) != -1 ) {
+ this.append("INI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("IGN") && "AEOU".indexOf(word.charAt(3)) != -1 && word.length() == 4) {
+ this.append("INI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("IJ") && "AOU".indexOf(word.charAt(2)) != -1 ) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("IJ") && word.length() == 2) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("IJ") ) {
+ this.append("EI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("IKOLE") && word.length() == 5) {
+ this.append("IKUL");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 5).equals("ILLAN") && "STZ".indexOf(word.charAt(5)) != -1 ) {
+ this.append("ILIA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 5).equals("ILLAR") && "DT".indexOf(word.charAt(5)) != -1 ) {
+ this.append("ILIA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("INVER") ) {
+ this.append("INFE");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("ITI") && "AÄOÖUÜ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("IZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("IVIER") && word.length() == 5) {
+ this.append("IFIE");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("I") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("JAVIE") ) {
+ this.append("ZA");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("JEAN") && word.length() == 4) {
+ this.append("IA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("JEAN") ) {
+ this.append("IA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("JER") ) {
+ this.append("IE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("JE") && "LMNST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("IE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("JOR") && "GK".indexOf(word.charAt(3)) != -1 && word.length() == 4) {
+ this.append("IÖRK");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("J") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("KC") && "ÄEIJ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("KE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("KE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("KH") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("KIC") && word.length() == 3) {
+ this.append("KIZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("KLE") && "LMNRST".indexOf(word.charAt(3)) != -1 ) {
+ this.append("KLE");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("KOTELE") ) {
+ this.append("KUTL");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("KREAT") ) {
+ this.append("KREA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("KST") ) {
+ this.append("XT");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("K") && "SßXZ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("KTI") && "AIOU".indexOf(word.charAt(3)) != -1 ) {
+ this.append("XI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("KT") && "SßXZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("K") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("LARVE") ) {
+ this.append("LARF");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("LEAND") ) {
+ this.append("LEAN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("LEL") ) {
+ this.append("LE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("LE") && "MNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("LE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("LETTE") && word.length() == 5) {
+ this.append("LET");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("LFGNAG") ) {
+ this.append("LFKAN");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("LIC") && word.length() == 3) {
+ this.append("LIZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("LIVE") && word.length() == 4) {
+ this.append("LEIF");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("LUI") && "GS".indexOf(word.charAt(3)) != -1 ) {
+ this.append("LU");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("L") ) {
+ this.append("L");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 7 && word.substring(0, 6).equals("MASSEU") && "RS".indexOf(word.charAt(6)) != -1 ) {
+ this.append("NAZÖ");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 7 && word.substring(0, 7).equals("MAURICE") ) {
+ this.append("NURIZ");
+ this.word = this.word.substring(7);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("MBH") && word.length() == 3) {
+ this.append("MPH");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("MB") && "SßZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("MC") ) {
+ this.append("NK");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("MEMOIR") ) {
+ this.append("NENUA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("ME") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("NE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("MIGUEL") ) {
+ this.append("NIKL");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("MIKE") && word.length() == 4) {
+ this.append("NEIK");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("MN") ) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("MPJUTE") ) {
+ this.append("NPUT");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("MP") && "SßZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("MP") && "BDJLMNPQRTVW".indexOf(word.charAt(2)) != -1 ) {
+ this.append("NP");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("M") ) {
+ this.append("N");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("NACH") ) {
+ this.append("NAK");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("NADINE") ) {
+ this.append("NATIN");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("NAIV") ) {
+ this.append("NA");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("NAISE") && word.length() == 5) {
+ this.append("NEZE");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("NCOISE") && word.length() == 6) {
+ this.append("ZUA");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("NCOIS") && word.length() == 5) {
+ this.append("ZUA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("NEBEN") ) {
+ this.append("NEPN");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("NE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("NE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("NEN") ) {
+ this.append("NE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("NETTE") && word.length() == 5) {
+ this.append("NET");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("NG") && "BDFJLMNPQRTVW".indexOf(word.charAt(2)) != -1 ) {
+ this.append("NK");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("NICHTS") ) {
+ this.append("NIX");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("NICHT") ) {
+ this.append("NIKT");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("NINE") && word.length() == 4) {
+ this.append("NIN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("NON") ) {
+ this.append("NUN");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("NOT") ) {
+ this.append("NUT");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("NTI") && "AIOU".indexOf(word.charAt(3)) != -1 ) {
+ this.append("NZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("NTIEL") ) {
+ this.append("NZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("NYLON") ) {
+ this.append("NEILUN");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ND") && "SßZ".indexOf(word.charAt(2)) != -1 && word.length() == 3) {
+ this.append("NZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("NT") && "SßZ".indexOf(word.charAt(2)) != -1 && word.length() == 3) {
+ this.append("NZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ND'S") && word.length() == 4) {
+ this.append("NZ");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("NT'S") && word.length() == 4) {
+ this.append("NZ");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("NSTS") && word.length() == 4) {
+ this.append("NZ");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("N") ) {
+ this.append("N");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("OBER") ) {
+ this.append("UPA");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("OE") ) {
+ this.append("Ö");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("OGNIE") ) {
+ this.append("UNI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("OGN") && "AEOU".indexOf(word.charAt(3)) != -1 && word.length() == 4) {
+ this.append("UNI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OIE") && word.length() == 3) {
+ this.append("Ö");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OIR") && word.length() == 3) {
+ this.append("UAR");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OIX") ) {
+ this.append("UA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("OI") ) {
+ this.append("EU");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("OJ") && "AÄEIOÖUÜ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("OKAY") && word.length() == 4) {
+ this.append("UKE");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("OLYN") && word.length() == 4) {
+ this.append("ULIN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("OTI") && "AÄOÖUÜ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("UZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("OUI") ) {
+ this.append("FI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("OUILLE") && word.length() == 6) {
+ this.append("ULIE");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("OU") && "DT".indexOf(word.charAt(2)) != -1 ) {
+ this.append("AU");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("OUSE") && word.length() == 4) {
+ this.append("AUZ");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OUT") ) {
+ this.append("AU");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("OU") ) {
+ this.append("U");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OWS") && word.length() == 3) {
+ this.append("UZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("OY") && "AÄEIOÖUÜ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("O") && "JY".indexOf(word.charAt(1)) != -1 ) {
+ this.append("EU");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("O") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("PATIEN") ) {
+ this.append("PAZI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("PENSIO") ) {
+ this.append("PANZI");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("PE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("PE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("PFER") ) {
+ this.append("FE");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("P") && "FH".indexOf(word.charAt(1)) != -1 ) {
+ this.append("F");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("POLY") ) {
+ this.append("PULI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 8 && word.substring(0, 8).equals("PORTRAIT") ) {
+ this.append("PURTRE");
+ this.word = this.word.substring(8);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("PP") && "FH".indexOf(word.charAt(2)) != -1 ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PP") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("PRIX") && word.length() == 4) {
+ this.append("PRI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 1).equals("P") && "SßZ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("PTI") && "AÄOÖUÜ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("PZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("PIC") && word.length() == 3) {
+ this.append("PIK");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("P") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("QUE") && "LMNRST".indexOf(word.charAt(3)) != -1 ) {
+ this.append("KFE");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("QUE") && word.length() == 3) {
+ this.append("K");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("QUI") && "NS".indexOf(word.charAt(3)) != -1 && word.length() == 4) {
+ this.append("KI");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("QU") ) {
+ this.append("KF");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Q") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("RCH") ) {
+ this.append("RK");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 8 && word.substring(0, 8).equals("RECHERCH") ) {
+ this.append("REZAZ");
+ this.word = this.word.substring(8);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("RER") && word.length() == 3) {
+ this.append("RA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("RE") && "MNR".indexOf(word.charAt(2)) != -1 ) {
+ this.append("RE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("RETTE") && word.length() == 5) {
+ this.append("RET");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("RH") ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("RJA") && "MN".indexOf(word.charAt(3)) != -1 ) {
+ this.append("RI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("RTI") && "AÄOÖUÜ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("RZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("RY") && "KN".indexOf(word.charAt(2)) != -1 && word.length() == 3) {
+ this.append("RI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("R") ) {
+ this.append("R");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("SAFE") && word.length() == 4) {
+ this.append("ZEIF");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("SAUCE") ) {
+ this.append("ZUZ");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("SCHSCH") ) {
+ this.append("");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 7 && word.substring(0, 7).equals("SCHTSCH") ) {
+ this.append("Z");
+ this.word = this.word.substring(7);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("SC") && "HZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SC") ) {
+ this.append("ZK");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 8 && word.substring(0, 8).equals("SELBSTST") ) {
+ this.append("ZELP");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("SELBST") ) {
+ this.append("ZELPZT");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 7 && word.substring(0, 7).equals("SERVICE") ) {
+ this.append("ZÖRFIZ");
+ this.word = this.word.substring(7);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("SE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ZE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("SETTE") && word.length() == 5) {
+ this.append("ZET");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("SHP") ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("SHST") ) {
+ this.append("ZT");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("SHTSH") ) {
+ this.append("Z");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("SHT") ) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SH") ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("SIEGLI") ) {
+ this.append("ZIKL");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("SIGLI") ) {
+ this.append("ZIKL");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("SIGHT") ) {
+ this.append("ZEIT");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("SIGN") ) {
+ this.append("ZEIN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("SKI") && "NPZ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("ZKI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("SKI") ) {
+ this.append("ZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("SOUND") ) {
+ this.append("ZAUN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("STAATS") ) {
+ this.append("ZTAZ");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("STADT") ) {
+ this.append("ZTAT");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("START") ) {
+ this.append("ZTART");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 8 && word.substring(0, 8).equals("STAURANT") ) {
+ this.append("ZTURAN");
+ this.word = this.word.substring(8);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("STEAK") ) {
+ this.append("ZTE");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("STRAF") ) {
+ this.append("ZTRAF");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ST'S") && word.length() == 4) {
+ this.append("Z");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("STST") ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("STS") && "ACEHIOUÄÜÖ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("ZT");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ST") && "SZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 4).equals("STYN") && "AE".indexOf(word.charAt(4)) != -1 && word.length() == 5) {
+ this.append("ZTIN");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ST") ) {
+ this.append("ZT");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("SZE") && "NPT".indexOf(word.charAt(3)) != -1 ) {
+ this.append("ZE");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("SZI") && "ELN".indexOf(word.charAt(3)) != -1 ) {
+ this.append("ZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("SZCZ") ) {
+ this.append("Z");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("SZT") ) {
+ this.append("ZT");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SZ") ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("T'S") && word.length() == 3) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("TCH") ) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("TEAT") ) {
+ this.append("TEA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("TE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("TE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TH") ) {
+ this.append("T");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("TIC") && word.length() == 3) {
+ this.append("TIZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("TOAS") ) {
+ this.append("TU");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("TOILET") ) {
+ this.append("TULE");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("TOIN") ) {
+ this.append("TUA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("TRAINI") ) {
+ this.append("TREN");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("TSCH") ) {
+ this.append("Z");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("TSH") ) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("TST") ) {
+ this.append("ZT");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("T") && "Sß".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("TT") && "SZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TT") ) {
+ this.append("T");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TZ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("T") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("UEBER") ) {
+ this.append("IPA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("UE") ) {
+ this.append("I");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("UIE") && word.length() == 3) {
+ this.append("I");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("UM") ) {
+ this.append("UN");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("UNTERE") ) {
+ this.append("UNTE");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("UNTER") ) {
+ this.append("UNTA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("UNVER") ) {
+ this.append("UNFA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("UN") ) {
+ this.append("UN");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("UTI") && "AÄOÖUÜ".indexOf(word.charAt(3)) != -1 ) {
+ this.append("UZI");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("U") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("VACL") ) {
+ this.append("FAZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("VAC") && word.length() == 3) {
+ this.append("FAZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("VEDD") ) {
+ this.append("FE");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("VEREIN") ) {
+ this.append("FAEIN");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("VERSEN") ) {
+ this.append("FAZN");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("VER") ) {
+ this.append("FA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("VER") ) {
+ this.append("FA");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("VET") && "HT".indexOf(word.charAt(3)) != -1 ) {
+ this.append("FET");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("VETTE") && word.length() == 5) {
+ this.append("FET");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("VIC") && word.length() == 3) {
+ this.append("FIZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("VIEL") ) {
+ this.append("FIL");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("VIEW") ) {
+ this.append("FIU");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 3).equals("VOR") ) {
+ this.append("FUR");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("VY") ) {
+ this.append("FI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("V") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("WE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("FE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("WIC") && word.length() == 3) {
+ this.append("FIZ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("WIEDER") ) {
+ this.append("FITA");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("WY") ) {
+ this.append("FI");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("W") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("XE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("XE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("X") ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("X") && "CSZ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("XTS") && "CH".indexOf(word.charAt(3)) != -1 ) {
+ this.append("XT");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("XT") && "SZ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("Z");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("X") ) {
+ this.append("X");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("YE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("IE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("YE") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 3).equals("YOR") && "GK".indexOf(word.charAt(3)) != -1 && word.length() == 4) {
+ this.append("IÖRK");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Y") && "AOU".indexOf(word.charAt(1)) != -1 ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 4 && word.substring(0, 4).equals("YVES") && word.length() == 4) {
+ this.append("IF");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("YVONNE") && word.length() == 6) {
+ this.append("IFUN");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Y") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ZC") && "AOU".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ZK");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("ZE") && "LMNRST".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ZE");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ZH") ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ZS") && "CHT".indexOf(word.charAt(2)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ZS") ) {
+ this.append("Z");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 6 && word.substring(0, 6).equals("ZUERST") ) {
+ this.append("ZUERZT");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("ZURÜCK") ) {
+ this.append("ZURIK");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("ZUVER") ) {
+ this.append("ZUFA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Z") ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneDe aphone = new AphoneDe();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneIs.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneIs.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneIs.java (revision 0)
@@ -0,0 +1,140 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneIs extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") ) {
+ this.append("Z");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("N") && word.length() == 1) {
+ this.append("NN");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("NN") && word.length() == 2) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Y") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("I") ) {
+ this.append("Y");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("L") ) {
+ this.append("LL");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("LL") ) {
+ this.append("L");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("FL") ) {
+ this.append("BL");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("LL") ) {
+ this.append("DL");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("RN") ) {
+ this.append("RDN");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("NGD") ) {
+ this.append("GND");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 5 && word.substring(0, 5).equals("TÖLVA") ) {
+ this.append("TALVA");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ý") ) {
+ this.append("Í");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Í") ) {
+ this.append("Ý");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneIs aphone = new AphoneIs();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/Aphone.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/Aphone.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/Aphone.java (revision 0)
@@ -0,0 +1,40 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Convert a word to its phonem with the aspell notation.
+ * @Author Mathieu Lecarme
+ * @see {http://en.wikipedia.org/wiki/Phonem}
+ * @see {http://aspell.net/}
+ */
+public abstract class Aphone {
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public abstract String toPhone(String word);
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(char[] word) {
+ return toPhone(new String(word));
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneEl.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneEl.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneEl.java (revision 0)
@@ -0,0 +1,485 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneEl extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΒΒ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Β") ) {
+ this.append("Β");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΓΓ") ) {
+ this.append("ΓΚ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Γ") ) {
+ this.append("Γ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΔΔ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Δ") ) {
+ this.append("Δ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΖΖ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ζ") ) {
+ this.append("Ζ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΘΘ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Θ") ) {
+ this.append("Θ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΚΚ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΚΣ") ) {
+ this.append("Ξ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Κ") ) {
+ this.append("Κ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΛΛ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Λ") ) {
+ this.append("Λ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΜΜ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Μ") ) {
+ this.append("Μ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΝΝ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ν") ) {
+ this.append("Ν");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΠΠ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΠΣ") ) {
+ this.append("Ψ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Π") ) {
+ this.append("Π");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΡΡ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ρ") ) {
+ this.append("Ρ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΣΣ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Σ") ) {
+ this.append("Σ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΤΤ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Τ") ) {
+ this.append("Τ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΦΦ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Φ") ) {
+ this.append("Φ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΧΧ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Χ") ) {
+ this.append("Χ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΑΎ") ) {
+ this.append("ΑΥ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ΑΥΝΤ") ) {
+ this.append("ΑΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ΑΥΓΚ") ) {
+ this.append("ΑΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ΑΥΤΖ") ) {
+ this.append("ΑΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ΑΥ") && "ΓΔΖΛΜΝΡ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ΑΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ΑΥΒ") ) {
+ this.append("ΑΒ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ΑΥ") && "ΑΕΗΙΟΩΥΆΈΉΊΌΏΎ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ΑΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ΑΥΦ") ) {
+ this.append("ΑΦ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ΑΥΤΣ") ) {
+ this.append("ΑΦ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ΑΥ") && "ΘΚΠΣΤΧ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ΑΦ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ΑΥΞ") ) {
+ this.append("ΑΦ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ΑΥΨ") ) {
+ this.append("ΑΦ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Α") && "ΙΊ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Ε");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Α") ) {
+ this.append("Α");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΕΎ") ) {
+ this.append("Υ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ΕΥΝΤ") ) {
+ this.append("ΕΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ΕΥΓΚ") ) {
+ this.append("ΕΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ΕΥΤΖ") ) {
+ this.append("ΕΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ΕΥ") && "ΓΔΖΛΜΝΡ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ΕΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ΕΥΒ") ) {
+ this.append("ΕΒ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ΕΥ") && "ΑΕΗΙΟΩΥΆΈΉΊΌΏΎ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ΕΒ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ΕΥΦ") ) {
+ this.append("ΕΦ");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("ΕΥΤΣ") ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ΕΥ") && "ΘΚΠΣΤΧ".indexOf(word.charAt(2)) != -1 ) {
+ this.append("ΕΦ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ΕΥΞ") ) {
+ this.append("ΕΦ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ΕΥΨ") ) {
+ this.append("ΕΦ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ε") && "ΙΊ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Ι");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ε") ) {
+ this.append("Ε");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ο") && "ΙΊ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Ι");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Ο") && "ΥΎ".indexOf(word.charAt(1)) != -1 ) {
+ this.append("ΟΥ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ο") ) {
+ this.append("Ο");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ω") ) {
+ this.append("Ο");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Η") ) {
+ this.append("Ι");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΥΙ") ) {
+ this.append("Ι");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Υ") ) {
+ this.append("Ι");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ι") ) {
+ this.append("Ι");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΞΞ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΞΣ") ) {
+ this.append("Ξ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ξ") ) {
+ this.append("Ξ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΨΨ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ΨΣ") ) {
+ this.append("Ψ");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ψ") ) {
+ this.append("Ψ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ϊ") ) {
+ this.append("Ι");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ϋ") ) {
+ this.append("Ι");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ά") ) {
+ this.append("Α");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ό") ) {
+ this.append("Ο");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ί") ) {
+ this.append("Ι");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Έ") ) {
+ this.append("Ε");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ύ") ) {
+ this.append("Υ");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ώ") ) {
+ this.append("Ο");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ή") ) {
+ this.append("Ι");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneEl aphone = new AphoneEl();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/Homophone.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/Homophone.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/Homophone.java (revision 0)
@@ -0,0 +1,104 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.lucene.search.spell.Dictionary;
+import org.apache.lucene.search.spell.PlainTextDictionary;
+
+/**
+ * Homophone classification of a list of words
+ * @see {http://en.wikipedia.org/wiki/Homophone}
+ * @Author Mathieu Lecarme
+ */
+public class Homophone {
+ private Aphone aphone;
+ private Map dict = new TreeMap();
+ public void setAphone(Aphone a) {
+ aphone = a;
+ }
+
+ /**
+ * Initilisation with a specific language
+ */
+ public Homophone(Aphone aphone) {
+ setAphone(aphone);
+ }
+
+ /**
+ * Add a word for sorting
+ */
+ public void addWord(String word) {
+ String phonem = aphone.toPhone(word);
+ System.out.println(word + " -> " + phonem);
+ if(! dict.containsKey(phonem)) {
+ dict.put(phonem, new HashSet());
+ }
+ ((Set)dict.get(phonem)).add(word);
+ }
+
+ /**
+ * Add a full dictionary
+ */
+ public void read(Dictionary dico) {
+ Iterator iter = dico.getWordsIterator();
+ while(iter.hasNext())
+ addWord((String)iter.next());
+ }
+
+ /**
+ * Show what is inside
+ */
+ public void dump() {
+ Iterator iter = dict.entrySet().iterator();
+ while(iter.hasNext()) {
+ Entry line = (Entry)iter.next();
+ System.out.println(line.getKey());
+ System.out.print(line.getValue());
+ System.out.print("\n");
+ }
+ }
+
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ Map languages = new HashMap();
+ languages.put("fr", new AphoneFr());
+ languages.put("en", new AphoneEn());
+ Homophone homophone = new Homophone((Aphone)languages.get(args[0]));
+ Dictionary d = null;
+ try {
+ d = new PlainTextDictionary(new File(args[1]));
+ } catch (FileNotFoundException e) {
+ }
+ if(d != null) {
+ homophone.read(d);
+ homophone.dump();
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneEn.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneEn.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneEn.java (revision 0)
@@ -0,0 +1,595 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneEn extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("AH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("AR") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 1).equals("A") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("*");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("A") ) {
+ this.append("*");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("AH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("AR") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("A") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("BB") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("B") ) {
+ this.append("B");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CQ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("CIA") ) {
+ this.append("X");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CH") ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("C") && "EIY".indexOf(word.charAt(1)) != -1 ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CK") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("COUGH") ) {
+ this.append("KF");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CC") ) {
+ this.append("C");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("C") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("DG") && "EIY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("K");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("DD") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("D") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("É") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("EH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("ER") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 1).equals("E") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("*");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 6 && word.substring(0, 6).equals("ENOUGH") && word.length() == 6) {
+ this.append("*NF");
+ this.word = this.word.substring(6);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("E") ) {
+ this.append("*");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("ER") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("E") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("FF") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("F") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("GN") ) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("GN") && word.length() == 2) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("GNS") && word.length() == 3) {
+ this.append("NS");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 4).equals("GNED") && word.length() == 4) {
+ this.append("N");
+ this.word = this.word.substring(4);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("GH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("GH") ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("GG") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("G") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("H") ) {
+ this.append("H");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("IH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("IR") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 1).equals("I") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("*");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("I") ) {
+ this.append("*");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("ING") ) {
+ this.append("N");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("IH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("IR") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("I") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("J") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("KN") ) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("KK") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("K") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("LAUGH") ) {
+ this.append("LF");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("LL") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("L") ) {
+ this.append("L");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("MB") && word.length() == 2) {
+ this.append("M");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("MM") ) {
+ this.append("M");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("M") ) {
+ this.append("M");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("NN") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("N") ) {
+ this.append("N");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("OH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("OR") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 1).equals("O") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("*");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("O") ) {
+ this.append("*");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("OH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("OR") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("O") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PH") ) {
+ this.append("F");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("PN") ) {
+ this.append("N");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PP") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("P") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Q") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("RH") ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("ROUGH") ) {
+ this.append("RF");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("RR") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("R") ) {
+ this.append("R");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 4 && word.substring(0, 3).equals("SCH") && "EOU".indexOf(word.charAt(3)) != -1 ) {
+ this.append("SK");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("SC") && "IEY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("S");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SH") ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("SI") && "AO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SS") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("TI") && "AO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TH") ) {
+ this.append("@");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("TCH") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 5 && word.substring(0, 5).equals("TOUGH") ) {
+ this.append("TF");
+ this.word = this.word.substring(5);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TT") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("T") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("UH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 3 && word.substring(0, 2).equals("UR") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("*R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 1).equals("U") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("*");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("U") ) {
+ this.append("*");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("UH") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("H");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("UR") && "AEIOUY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("U") && "HR".indexOf(word.charAt(1)) != -1 ) {
+ this.append("");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("V") ) {
+ this.append("W");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("V") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("WR") ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( this.starting && word.length() >= 2 && word.substring(0, 2).equals("WH") ) {
+ this.append("W");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("W") && "AEIOU".indexOf(word.charAt(1)) != -1 ) {
+ this.append("W");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( this.starting && word.length() >= 1 && word.substring(0, 1).equals("X") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("X") ) {
+ this.append("KS");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Y") && "AEIOU".indexOf(word.charAt(1)) != -1 ) {
+ this.append("Y");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ZZ") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Z") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneEn aphone = new AphoneEn();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/src/java/org/apache/lucene/aphone/AphoneBr.java
===================================================================
--- contrib/aphone/src/java/org/apache/lucene/aphone/AphoneBr.java (revision 0)
+++ contrib/aphone/src/java/org/apache/lucene/aphone/AphoneBr.java (revision 0)
@@ -0,0 +1,465 @@
+package org.apache.lucene.aphone;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * String to phone conversion
+ * @Author Mathieu Lecarme
+ */
+public class AphoneBr extends Aphone{
+ private StringBuffer phone;
+ private String word;
+ private boolean starting;
+
+ /**
+ * @param a word
+ * @return phonetic transcription
+ */
+ public String toPhone(String word) {
+ if(word == null)
+ return null;
+ this.phone = new StringBuffer();
+ this.word = word.toUpperCase();
+ this.starting = true;
+ while( this.word.length() > 0 ) {
+ this.eat();
+ this.starting = false;
+ }
+ return this.phone.toString();
+ }
+
+ private void append(String letter) {
+ if (letter.length() > 0) {
+ if (this.phone.length() > 0 && letter.charAt(0) == this.phone.charAt(this.phone.length() - 1)) {
+ if (letter.length() > 1)
+ this.phone.append(letter.substring(1));
+ } else
+ this.phone.append(letter);
+ }
+ }
+
+ private boolean eat() {
+ if( word.length() >= 2 && word.substring(0, 2).equals("AI") ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("AN") && "AEUIO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("AM");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AN") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("AMM") ) {
+ this.append("AM");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("AM") && "AEUIO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("AM");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AM") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("AU") ) {
+ this.append("O");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("A") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Â") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("À") ) {
+ this.append("A");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("BB") ) {
+ this.append("P");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("B") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ç") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("C") && "EI".indexOf(word.charAt(1)) != -1 ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CU") && "EI".indexOf(word.charAt(2)) != -1 ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("CC") && "EI".indexOf(word.charAt(2)) != -1 ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CC") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("CH") ) {
+ this.append("CH");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("C") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("DD") ) {
+ this.append("T");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("D") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 8 && word.substring(0, 8).equals("EMMENTAL") ) {
+ this.append("EMATAL");
+ this.word = this.word.substring(8);
+ return true;
+ }
+ if( word.length() >= 9 && word.substring(0, 9).equals("EMMENTHAL") ) {
+ this.append("EMATAL");
+ this.word = this.word.substring(9);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EM") && "AEIOU".indexOf(word.charAt(2)) != -1 ) {
+ this.append("EM");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EM") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ET") && word.length() == 2) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EU") ) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("EN") && "AEUIO".indexOf(word.charAt(2)) != -1 ) {
+ this.append("EM");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EN") ) {
+ this.append("A");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ER") && word.length() == 2) {
+ this.append("E");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("EO") ) {
+ this.append("O");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("EAU") ) {
+ this.append("O");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("E") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("È") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("É") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ê") ) {
+ this.append("E");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("F") ) {
+ this.append("F");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("G") && "EIY".indexOf(word.charAt(1)) != -1 ) {
+ this.append("J");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 2).equals("GU") && "EIY".indexOf(word.charAt(2)) != -1 ) {
+ this.append("G");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("G") ) {
+ this.append("G");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("H") ) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("I") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Î") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("J") ) {
+ this.append("J");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("KS") ) {
+ this.append("X");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("K") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("LL") ) {
+ this.append("L");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("L") ) {
+ this.append("L");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("MM") ) {
+ this.append("M");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("M") ) {
+ this.append("M");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("NN") ) {
+ this.append("M");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("N") ) {
+ this.append("M");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 3 && word.substring(0, 3).equals("OEU") ) {
+ this.append("E");
+ this.word = this.word.substring(3);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("OU") ) {
+ this.append("U");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("OÙ") ) {
+ this.append("U");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("O") ) {
+ this.append("O");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ô") ) {
+ this.append("O");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PP") ) {
+ this.append("P");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("PH") ) {
+ this.append("F");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("P") ) {
+ this.append("P");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("QU") ) {
+ this.append("K");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Q") ) {
+ this.append("K");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("RR") ) {
+ this.append("R");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("R") ) {
+ this.append("R");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") && word.length() == 1) {
+ this.append("");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("SS") ) {
+ this.append("S");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("S") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("TT") ) {
+ this.append("T");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("T") ) {
+ this.append("T");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("U") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Ù") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Û") ) {
+ this.append("U");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("V") ) {
+ this.append("V");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("W") ) {
+ this.append("W");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("X") ) {
+ this.append("X");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 1).equals("Y") && "AEOU".indexOf(word.charAt(1)) != -1 ) {
+ this.append("IL");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Y") ) {
+ this.append("I");
+ this.word = this.word.substring(1);
+ return true;
+ }
+ if( word.length() >= 2 && word.substring(0, 2).equals("ZZ") ) {
+ this.append("S");
+ this.word = this.word.substring(2);
+ return true;
+ }
+ if( word.length() >= 1 && word.substring(0, 1).equals("Z") ) {
+ this.append("S");
+ this.word = this.word.substring(1);
+ return true;
+ }
+
+ this.word = this.word.substring(1);
+ return false;
+ }
+ /**
+ * Simple test
+ */
+ public static void main(String[] args) {
+ AphoneBr aphone = new AphoneBr();
+ for(int i = 0; i < args.length; i++) {
+ System.out.println(aphone.toPhone(args[i]));
+ }
+ }
+}
Index: contrib/aphone/pom.xml
===================================================================
Index: contrib/aphone/build.xml
===================================================================
--- contrib/aphone/build.xml (revision 0)
+++ contrib/aphone/build.xml (revision 0)
@@ -0,0 +1,68 @@
+
+
+
+
+
+
+
+ Aphone
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ XML Parser building dependency ${spellchecker.jar}
+
+
+
+
+
+
+ Must specify 'list' property.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+