Index: src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java =================================================================== --- src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (revision 0) +++ src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (revision 0) @@ -0,0 +1,1901 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +import java.io.StringReader; +import java.util.List; +import java.util.ArrayList; +import java.util.Iterator; + +public class TestASCIIFoldingFilter extends LuceneTestCase { + + // testLain1Accents() is a copy of TestLatin1AccentFilter.testU(). + public void testLatin1Accents() throws Exception { + TokenStream stream = new WhitespaceTokenizer(new StringReader + ("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ" + +" Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij" + +" ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl")); + ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); + + final Token reusableToken = new Token(); + + assertEquals("Des", filter.next(reusableToken).term()); + assertEquals("mot", filter.next(reusableToken).term()); + assertEquals("cles", filter.next(reusableToken).term()); + assertEquals("A", filter.next(reusableToken).term()); + assertEquals("LA", filter.next(reusableToken).term()); + assertEquals("CHAINE", filter.next(reusableToken).term()); + assertEquals("A", filter.next(reusableToken).term()); + assertEquals("A", filter.next(reusableToken).term()); + assertEquals("A", filter.next(reusableToken).term()); + assertEquals("A", filter.next(reusableToken).term()); + assertEquals("A", filter.next(reusableToken).term()); + assertEquals("A", filter.next(reusableToken).term()); + assertEquals("AE", filter.next(reusableToken).term()); + assertEquals("C", filter.next(reusableToken).term()); + assertEquals("E", filter.next(reusableToken).term()); + assertEquals("E", filter.next(reusableToken).term()); + assertEquals("E", filter.next(reusableToken).term()); + assertEquals("E", filter.next(reusableToken).term()); + assertEquals("I", filter.next(reusableToken).term()); + assertEquals("I", filter.next(reusableToken).term()); + assertEquals("I", filter.next(reusableToken).term()); + assertEquals("I", filter.next(reusableToken).term()); + assertEquals("IJ", filter.next(reusableToken).term()); + assertEquals("D", filter.next(reusableToken).term()); + assertEquals("N", filter.next(reusableToken).term()); + assertEquals("O", filter.next(reusableToken).term()); + assertEquals("O", filter.next(reusableToken).term()); + assertEquals("O", filter.next(reusableToken).term()); + assertEquals("O", filter.next(reusableToken).term()); + assertEquals("O", filter.next(reusableToken).term()); + assertEquals("O", filter.next(reusableToken).term()); + assertEquals("OE", filter.next(reusableToken).term()); + assertEquals("TH", filter.next(reusableToken).term()); + assertEquals("U", filter.next(reusableToken).term()); + assertEquals("U", filter.next(reusableToken).term()); + assertEquals("U", filter.next(reusableToken).term()); + assertEquals("U", filter.next(reusableToken).term()); + assertEquals("Y", filter.next(reusableToken).term()); + assertEquals("Y", filter.next(reusableToken).term()); + assertEquals("a", filter.next(reusableToken).term()); + assertEquals("a", filter.next(reusableToken).term()); + assertEquals("a", filter.next(reusableToken).term()); + assertEquals("a", filter.next(reusableToken).term()); + assertEquals("a", filter.next(reusableToken).term()); + assertEquals("a", filter.next(reusableToken).term()); + assertEquals("ae", filter.next(reusableToken).term()); + assertEquals("c", filter.next(reusableToken).term()); + assertEquals("e", filter.next(reusableToken).term()); + assertEquals("e", filter.next(reusableToken).term()); + assertEquals("e", filter.next(reusableToken).term()); + assertEquals("e", filter.next(reusableToken).term()); + assertEquals("i", filter.next(reusableToken).term()); + assertEquals("i", filter.next(reusableToken).term()); + assertEquals("i", filter.next(reusableToken).term()); + assertEquals("i", filter.next(reusableToken).term()); + assertEquals("ij", filter.next(reusableToken).term()); + assertEquals("d", filter.next(reusableToken).term()); + assertEquals("n", filter.next(reusableToken).term()); + assertEquals("o", filter.next(reusableToken).term()); + assertEquals("o", filter.next(reusableToken).term()); + assertEquals("o", filter.next(reusableToken).term()); + assertEquals("o", filter.next(reusableToken).term()); + assertEquals("o", filter.next(reusableToken).term()); + assertEquals("o", filter.next(reusableToken).term()); + assertEquals("oe", filter.next(reusableToken).term()); + assertEquals("ss", filter.next(reusableToken).term()); + assertEquals("th", filter.next(reusableToken).term()); + assertEquals("u", filter.next(reusableToken).term()); + assertEquals("u", filter.next(reusableToken).term()); + assertEquals("u", filter.next(reusableToken).term()); + assertEquals("u", filter.next(reusableToken).term()); + assertEquals("y", filter.next(reusableToken).term()); + assertEquals("y", filter.next(reusableToken).term()); + assertEquals("fi", filter.next(reusableToken).term()); + assertEquals("fl", filter.next(reusableToken).term()); + assertNull(filter.next(reusableToken)); + } + + + // The following Perl script generated the foldings[] array automatically + // from ASCIIFoldingFilter.java: + // + // ============== begin get.test.cases.pl ============== + // + // use strict; + // use warnings; + // + // my $file = "ASCIIFoldingFilter.java"; + // my $output = "testcases.txt"; + // my %codes = (); + // my $folded = ''; + // + // open IN, "<:utf8", $file || die "Error opening input file '$file': $!"; + // open OUT, ">:utf8", $output || die "Error opening output file '$output': $!"; + // + // while (my $line = ) { + // chomp($line); + // # case '\u0133': // [ description ] + // if ($line =~ /case\s+'\\u(....)':.*\[([^\]]+)\]/) { + // my $code = $1; + // my $desc = $2; + // $codes{$code} = $desc; + // } + // # output[outputPos++] = 'A'; + // elsif ($line =~ /output\[outputPos\+\+\] = '(.+)';/) { + // my $output_char = $1; + // $folded .= $output_char; + // } + // elsif ($line =~ /break;/ && length($folded) > 0) { + // my $first = 1; + // for my $code (sort { hex($a) <=> hex($b) } keys %codes) { + // my $desc = $codes{$code}; + // print OUT ' '; + // print OUT '+ ' if (not $first); + // $first = 0; + // print OUT '"', chr(hex($code)), qq!" // U+$code: $desc\n!; + // } + // print OUT qq! ,"$folded", // Folded result\n\n!; + // %codes = (); + // $folded = ''; + // } + // } + // close OUT; + // + // ============== end get.test.cases.pl ============== + // + public void testAllFoldings() throws Exception { + // Alternating strings of: + // 1. All non-ASCII characters to be folded, concatenated together as a + // single string. + // 2. The string of ASCII characters to which each of the above + // characters should be folded. + String[] foldings = { + "À" // U+00C0: LATIN CAPITAL LETTER A WITH GRAVE + + "Á" // U+00C1: LATIN CAPITAL LETTER A WITH ACUTE + + "Â" // U+00C2: LATIN CAPITAL LETTER A WITH CIRCUMFLEX + + "Ã" // U+00C3: LATIN CAPITAL LETTER A WITH TILDE + + "Ä" // U+00C4: LATIN CAPITAL LETTER A WITH DIAERESIS + + "Å" // U+00C5: LATIN CAPITAL LETTER A WITH RING ABOVE + + "Ā" // U+0100: LATIN CAPITAL LETTER A WITH MACRON + + "Ă" // U+0102: LATIN CAPITAL LETTER A WITH BREVE + + "Ą" // U+0104: LATIN CAPITAL LETTER A WITH OGONEK + + "Ə" // U+018F: LATIN CAPITAL LETTER SCHWA + + "Ǎ" // U+01CD: LATIN CAPITAL LETTER A WITH CARON + + "Ǟ" // U+01DE: LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + + "Ǡ" // U+01E0: LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON + + "Ǻ" // U+01FA: LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE + + "Ȁ" // U+0200: LATIN CAPITAL LETTER A WITH DOUBLE GRAVE + + "Ȃ" // U+0202: LATIN CAPITAL LETTER A WITH INVERTED BREVE + + "Ȧ" // U+0226: LATIN CAPITAL LETTER A WITH DOT ABOVE + + "Ⱥ" // U+023A: LATIN CAPITAL LETTER A WITH STROKE + + "ᴀ" // U+1D00: LATIN LETTER SMALL CAPITAL A + + "Ḁ" // U+1E00: LATIN CAPITAL LETTER A WITH RING BELOW + + "Ạ" // U+1EA0: LATIN CAPITAL LETTER A WITH DOT BELOW + + "Ả" // U+1EA2: LATIN CAPITAL LETTER A WITH HOOK ABOVE + + "Ấ" // U+1EA4: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + + "Ầ" // U+1EA6: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + + "Ẩ" // U+1EA8: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + + "Ẫ" // U+1EAA: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + + "Ậ" // U+1EAC: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + + "Ắ" // U+1EAE: LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + + "Ằ" // U+1EB0: LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + + "Ẳ" // U+1EB2: LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + + "Ẵ" // U+1EB4: LATIN CAPITAL LETTER A WITH BREVE AND TILDE + + "Ặ" // U+1EB6: LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + + "Ⓐ" // U+24B6: CIRCLED LATIN CAPITAL LETTER A + + "A" // U+FF21: FULLWIDTH LATIN CAPITAL LETTER A + ,"A", // Folded result + + "à" // U+00E0: LATIN SMALL LETTER A WITH GRAVE + + "á" // U+00E1: LATIN SMALL LETTER A WITH ACUTE + + "â" // U+00E2: LATIN SMALL LETTER A WITH CIRCUMFLEX + + "ã" // U+00E3: LATIN SMALL LETTER A WITH TILDE + + "ä" // U+00E4: LATIN SMALL LETTER A WITH DIAERESIS + + "å" // U+00E5: LATIN SMALL LETTER A WITH RING ABOVE + + "ā" // U+0101: LATIN SMALL LETTER A WITH MACRON + + "ă" // U+0103: LATIN SMALL LETTER A WITH BREVE + + "ą" // U+0105: LATIN SMALL LETTER A WITH OGONEK + + "ǎ" // U+01CE: LATIN SMALL LETTER A WITH CARON + + "ǟ" // U+01DF: LATIN SMALL LETTER A WITH DIAERESIS AND MACRON + + "ǡ" // U+01E1: LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON + + "ǻ" // U+01FB: LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE + + "ȁ" // U+0201: LATIN SMALL LETTER A WITH DOUBLE GRAVE + + "ȃ" // U+0203: LATIN SMALL LETTER A WITH INVERTED BREVE + + "ȧ" // U+0227: LATIN SMALL LETTER A WITH DOT ABOVE + + "ɐ" // U+0250: LATIN SMALL LETTER TURNED A + + "ə" // U+0259: LATIN SMALL LETTER SCHWA + + "ɚ" // U+025A: LATIN SMALL LETTER SCHWA WITH HOOK + + "ᶏ" // U+1D8F: LATIN SMALL LETTER A WITH RETROFLEX HOOK + + "ḁ" // U+1E01: LATIN SMALL LETTER A WITH RING BELOW + + "ᶕ" // U+1D95: LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK + + "ẚ" // U+1E9A: LATIN SMALL LETTER A WITH RIGHT HALF RING + + "ạ" // U+1EA1: LATIN SMALL LETTER A WITH DOT BELOW + + "ả" // U+1EA3: LATIN SMALL LETTER A WITH HOOK ABOVE + + "ấ" // U+1EA5: LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE + + "ầ" // U+1EA7: LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE + + "ẩ" // U+1EA9: LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + + "ẫ" // U+1EAB: LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE + + "ậ" // U+1EAD: LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW + + "ắ" // U+1EAF: LATIN SMALL LETTER A WITH BREVE AND ACUTE + + "ằ" // U+1EB1: LATIN SMALL LETTER A WITH BREVE AND GRAVE + + "ẳ" // U+1EB3: LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE + + "ẵ" // U+1EB5: LATIN SMALL LETTER A WITH BREVE AND TILDE + + "ặ" // U+1EB7: LATIN SMALL LETTER A WITH BREVE AND DOT BELOW + + "ₐ" // U+2090: LATIN SUBSCRIPT SMALL LETTER A + + "ₔ" // U+2094: LATIN SUBSCRIPT SMALL LETTER SCHWA + + "ⓐ" // U+24D0: CIRCLED LATIN SMALL LETTER A + + "ⱥ" // U+2C65: LATIN SMALL LETTER A WITH STROKE + + "Ɐ" // U+2C6F: LATIN CAPITAL LETTER TURNED A + + "a" // U+FF41: FULLWIDTH LATIN SMALL LETTER A + ,"a", // Folded result + + "Ꜳ" // U+A732: LATIN CAPITAL LETTER AA + ,"AA", // Folded result + + "Æ" // U+00C6: LATIN CAPITAL LETTER AE + + "Ǣ" // U+01E2: LATIN CAPITAL LETTER AE WITH MACRON + + "Ǽ" // U+01FC: LATIN CAPITAL LETTER AE WITH ACUTE + + "ᴁ" // U+1D01: LATIN LETTER SMALL CAPITAL AE + ,"AE", // Folded result + + "Ꜵ" // U+A734: LATIN CAPITAL LETTER AO + ,"AO", // Folded result + + "Ꜷ" // U+A736: LATIN CAPITAL LETTER AU + ,"AU", // Folded result + + "Ꜹ" // U+A738: LATIN CAPITAL LETTER AV + + "Ꜻ" // U+A73A: LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR + ,"AV", // Folded result + + "Ꜽ" // U+A73C: LATIN CAPITAL LETTER AY + ,"AY", // Folded result + + "⒜" // U+249C: PARENTHESIZED LATIN SMALL LETTER A + ,"(a)", // Folded result + + "ꜳ" // U+A733: LATIN SMALL LETTER AA + ,"aa", // Folded result + + "æ" // U+00E6: LATIN SMALL LETTER AE + + "ǣ" // U+01E3: LATIN SMALL LETTER AE WITH MACRON + + "ǽ" // U+01FD: LATIN SMALL LETTER AE WITH ACUTE + + "ᴂ" // U+1D02: LATIN SMALL LETTER TURNED AE + ,"ae", // Folded result + + "ꜵ" // U+A735: LATIN SMALL LETTER AO + ,"ao", // Folded result + + "ꜷ" // U+A737: LATIN SMALL LETTER AU + ,"au", // Folded result + + "ꜹ" // U+A739: LATIN SMALL LETTER AV + + "ꜻ" // U+A73B: LATIN SMALL LETTER AV WITH HORIZONTAL BAR + ,"av", // Folded result + + "ꜽ" // U+A73D: LATIN SMALL LETTER AY + ,"ay", // Folded result + + "Ɓ" // U+0181: LATIN CAPITAL LETTER B WITH HOOK + + "Ƃ" // U+0182: LATIN CAPITAL LETTER B WITH TOPBAR + + "Ƀ" // U+0243: LATIN CAPITAL LETTER B WITH STROKE + + "ʙ" // U+0299: LATIN LETTER SMALL CAPITAL B + + "ᴃ" // U+1D03: LATIN LETTER SMALL CAPITAL BARRED B + + "Ḃ" // U+1E02: LATIN CAPITAL LETTER B WITH DOT ABOVE + + "Ḅ" // U+1E04: LATIN CAPITAL LETTER B WITH DOT BELOW + + "Ḇ" // U+1E06: LATIN CAPITAL LETTER B WITH LINE BELOW + + "Ⓑ" // U+24B7: CIRCLED LATIN CAPITAL LETTER B + + "B" // U+FF22: FULLWIDTH LATIN CAPITAL LETTER B + ,"B", // Folded result + + "ƀ" // U+0180: LATIN SMALL LETTER B WITH STROKE + + "ƃ" // U+0183: LATIN SMALL LETTER B WITH TOPBAR + + "ɓ" // U+0253: LATIN SMALL LETTER B WITH HOOK + + "ᵬ" // U+1D6C: LATIN SMALL LETTER B WITH MIDDLE TILDE + + "ᶀ" // U+1D80: LATIN SMALL LETTER B WITH PALATAL HOOK + + "ḃ" // U+1E03: LATIN SMALL LETTER B WITH DOT ABOVE + + "ḅ" // U+1E05: LATIN SMALL LETTER B WITH DOT BELOW + + "ḇ" // U+1E07: LATIN SMALL LETTER B WITH LINE BELOW + + "ⓑ" // U+24D1: CIRCLED LATIN SMALL LETTER B + + "b" // U+FF42: FULLWIDTH LATIN SMALL LETTER B + ,"b", // Folded result + + "⒝" // U+249D: PARENTHESIZED LATIN SMALL LETTER B + ,"(b)", // Folded result + + "Ç" // U+00C7: LATIN CAPITAL LETTER C WITH CEDILLA + + "Ć" // U+0106: LATIN CAPITAL LETTER C WITH ACUTE + + "Ĉ" // U+0108: LATIN CAPITAL LETTER C WITH CIRCUMFLEX + + "Ċ" // U+010A: LATIN CAPITAL LETTER C WITH DOT ABOVE + + "Č" // U+010C: LATIN CAPITAL LETTER C WITH CARON + + "Ƈ" // U+0187: LATIN CAPITAL LETTER C WITH HOOK + + "Ȼ" // U+023B: LATIN CAPITAL LETTER C WITH STROKE + + "ʗ" // U+0297: LATIN LETTER STRETCHED C + + "ᴄ" // U+1D04: LATIN LETTER SMALL CAPITAL C + + "Ḉ" // U+1E08: LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + + "Ⓒ" // U+24B8: CIRCLED LATIN CAPITAL LETTER C + + "C" // U+FF23: FULLWIDTH LATIN CAPITAL LETTER C + ,"C", // Folded result + + "ç" // U+00E7: LATIN SMALL LETTER C WITH CEDILLA + + "ć" // U+0107: LATIN SMALL LETTER C WITH ACUTE + + "ĉ" // U+0109: LATIN SMALL LETTER C WITH CIRCUMFLEX + + "ċ" // U+010B: LATIN SMALL LETTER C WITH DOT ABOVE + + "č" // U+010D: LATIN SMALL LETTER C WITH CARON + + "ƈ" // U+0188: LATIN SMALL LETTER C WITH HOOK + + "ȼ" // U+023C: LATIN SMALL LETTER C WITH STROKE + + "ɕ" // U+0255: LATIN SMALL LETTER C WITH CURL + + "ḉ" // U+1E09: LATIN SMALL LETTER C WITH CEDILLA AND ACUTE + + "ↄ" // U+2184: LATIN SMALL LETTER REVERSED C + + "ⓒ" // U+24D2: CIRCLED LATIN SMALL LETTER C + + "Ꜿ" // U+A73E: LATIN CAPITAL LETTER REVERSED C WITH DOT + + "ꜿ" // U+A73F: LATIN SMALL LETTER REVERSED C WITH DOT + + "c" // U+FF43: FULLWIDTH LATIN SMALL LETTER C + ,"c", // Folded result + + "⒞" // U+249E: PARENTHESIZED LATIN SMALL LETTER C + ,"(c)", // Folded result + + "Ð" // U+00D0: LATIN CAPITAL LETTER ETH + + "Ď" // U+010E: LATIN CAPITAL LETTER D WITH CARON + + "Đ" // U+0110: LATIN CAPITAL LETTER D WITH STROKE + + "Ɖ" // U+0189: LATIN CAPITAL LETTER AFRICAN D + + "Ɗ" // U+018A: LATIN CAPITAL LETTER D WITH HOOK + + "Ƌ" // U+018B: LATIN CAPITAL LETTER D WITH TOPBAR + + "ᴅ" // U+1D05: LATIN LETTER SMALL CAPITAL D + + "ᴆ" // U+1D06: LATIN LETTER SMALL CAPITAL ETH + + "Ḋ" // U+1E0A: LATIN CAPITAL LETTER D WITH DOT ABOVE + + "Ḍ" // U+1E0C: LATIN CAPITAL LETTER D WITH DOT BELOW + + "Ḏ" // U+1E0E: LATIN CAPITAL LETTER D WITH LINE BELOW + + "Ḑ" // U+1E10: LATIN CAPITAL LETTER D WITH CEDILLA + + "Ḓ" // U+1E12: LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + + "Ⓓ" // U+24B9: CIRCLED LATIN CAPITAL LETTER D + + "Ꝺ" // U+A779: LATIN CAPITAL LETTER INSULAR D + + "D" // U+FF24: FULLWIDTH LATIN CAPITAL LETTER D + ,"D", // Folded result + + "ð" // U+00F0: LATIN SMALL LETTER ETH + + "ď" // U+010F: LATIN SMALL LETTER D WITH CARON + + "đ" // U+0111: LATIN SMALL LETTER D WITH STROKE + + "ƌ" // U+018C: LATIN SMALL LETTER D WITH TOPBAR + + "ȡ" // U+0221: LATIN SMALL LETTER D WITH CURL + + "ɖ" // U+0256: LATIN SMALL LETTER D WITH TAIL + + "ɗ" // U+0257: LATIN SMALL LETTER D WITH HOOK + + "ᵭ" // U+1D6D: LATIN SMALL LETTER D WITH MIDDLE TILDE + + "ᶁ" // U+1D81: LATIN SMALL LETTER D WITH PALATAL HOOK + + "ᶑ" // U+1D91: LATIN SMALL LETTER D WITH HOOK AND TAIL + + "ḋ" // U+1E0B: LATIN SMALL LETTER D WITH DOT ABOVE + + "ḍ" // U+1E0D: LATIN SMALL LETTER D WITH DOT BELOW + + "ḏ" // U+1E0F: LATIN SMALL LETTER D WITH LINE BELOW + + "ḑ" // U+1E11: LATIN SMALL LETTER D WITH CEDILLA + + "ḓ" // U+1E13: LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW + + "ⓓ" // U+24D3: CIRCLED LATIN SMALL LETTER D + + "ꝺ" // U+A77A: LATIN SMALL LETTER INSULAR D + + "d" // U+FF44: FULLWIDTH LATIN SMALL LETTER D + ,"d", // Folded result + + "DŽ" // U+01C4: LATIN CAPITAL LETTER DZ WITH CARON + + "DZ" // U+01F1: LATIN CAPITAL LETTER DZ + ,"DZ", // Folded result + + "Dž" // U+01C5: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + + "Dz" // U+01F2: LATIN CAPITAL LETTER D WITH SMALL LETTER Z + ,"Dz", // Folded result + + "⒟" // U+249F: PARENTHESIZED LATIN SMALL LETTER D + ,"(d)", // Folded result + + "ȸ" // U+0238: LATIN SMALL LETTER DB DIGRAPH + ,"db", // Folded result + + "dž" // U+01C6: LATIN SMALL LETTER DZ WITH CARON + + "dz" // U+01F3: LATIN SMALL LETTER DZ + + "ʣ" // U+02A3: LATIN SMALL LETTER DZ DIGRAPH + + "ʥ" // U+02A5: LATIN SMALL LETTER DZ DIGRAPH WITH CURL + ,"dz", // Folded result + + "È" // U+00C8: LATIN CAPITAL LETTER E WITH GRAVE + + "É" // U+00C9: LATIN CAPITAL LETTER E WITH ACUTE + + "Ê" // U+00CA: LATIN CAPITAL LETTER E WITH CIRCUMFLEX + + "Ë" // U+00CB: LATIN CAPITAL LETTER E WITH DIAERESIS + + "Ē" // U+0112: LATIN CAPITAL LETTER E WITH MACRON + + "Ĕ" // U+0114: LATIN CAPITAL LETTER E WITH BREVE + + "Ė" // U+0116: LATIN CAPITAL LETTER E WITH DOT ABOVE + + "Ę" // U+0118: LATIN CAPITAL LETTER E WITH OGONEK + + "Ě" // U+011A: LATIN CAPITAL LETTER E WITH CARON + + "Ǝ" // U+018E: LATIN CAPITAL LETTER REVERSED E + + "Ɛ" // U+0190: LATIN CAPITAL LETTER OPEN E + + "Ȅ" // U+0204: LATIN CAPITAL LETTER E WITH DOUBLE GRAVE + + "Ȇ" // U+0206: LATIN CAPITAL LETTER E WITH INVERTED BREVE + + "Ȩ" // U+0228: LATIN CAPITAL LETTER E WITH CEDILLA + + "Ɇ" // U+0246: LATIN CAPITAL LETTER E WITH STROKE + + "ᴇ" // U+1D07: LATIN LETTER SMALL CAPITAL E + + "Ḕ" // U+1E14: LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + + "Ḗ" // U+1E16: LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + + "Ḙ" // U+1E18: LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + + "Ḛ" // U+1E1A: LATIN CAPITAL LETTER E WITH TILDE BELOW + + "Ḝ" // U+1E1C: LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + + "Ẹ" // U+1EB8: LATIN CAPITAL LETTER E WITH DOT BELOW + + "Ẻ" // U+1EBA: LATIN CAPITAL LETTER E WITH HOOK ABOVE + + "Ẽ" // U+1EBC: LATIN CAPITAL LETTER E WITH TILDE + + "Ế" // U+1EBE: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + + "Ề" // U+1EC0: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + + "Ể" // U+1EC2: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + + "Ễ" // U+1EC4: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + + "Ệ" // U+1EC6: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + + "Ⓔ" // U+24BA: CIRCLED LATIN CAPITAL LETTER E + + "ⱻ" // U+2C7B: LATIN LETTER SMALL CAPITAL TURNED E + + "E" // U+FF25: FULLWIDTH LATIN CAPITAL LETTER E + ,"E", // Folded result + + "è" // U+00E8: LATIN SMALL LETTER E WITH GRAVE + + "é" // U+00E9: LATIN SMALL LETTER E WITH ACUTE + + "ê" // U+00EA: LATIN SMALL LETTER E WITH CIRCUMFLEX + + "ë" // U+00EB: LATIN SMALL LETTER E WITH DIAERESIS + + "ē" // U+0113: LATIN SMALL LETTER E WITH MACRON + + "ĕ" // U+0115: LATIN SMALL LETTER E WITH BREVE + + "ė" // U+0117: LATIN SMALL LETTER E WITH DOT ABOVE + + "ę" // U+0119: LATIN SMALL LETTER E WITH OGONEK + + "ě" // U+011B: LATIN SMALL LETTER E WITH CARON + + "ǝ" // U+01DD: LATIN SMALL LETTER TURNED E + + "ȅ" // U+0205: LATIN SMALL LETTER E WITH DOUBLE GRAVE + + "ȇ" // U+0207: LATIN SMALL LETTER E WITH INVERTED BREVE + + "ȩ" // U+0229: LATIN SMALL LETTER E WITH CEDILLA + + "ɇ" // U+0247: LATIN SMALL LETTER E WITH STROKE + + "ɘ" // U+0258: LATIN SMALL LETTER REVERSED E + + "ɛ" // U+025B: LATIN SMALL LETTER OPEN E + + "ɜ" // U+025C: LATIN SMALL LETTER REVERSED OPEN E + + "ɝ" // U+025D: LATIN SMALL LETTER REVERSED OPEN E WITH HOOK + + "ɞ" // U+025E: LATIN SMALL LETTER CLOSED REVERSED OPEN E + + "ʚ" // U+029A: LATIN SMALL LETTER CLOSED OPEN E + + "ᴈ" // U+1D08: LATIN SMALL LETTER TURNED OPEN E + + "ᶒ" // U+1D92: LATIN SMALL LETTER E WITH RETROFLEX HOOK + + "ᶓ" // U+1D93: LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK + + "ᶔ" // U+1D94: LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK + + "ḕ" // U+1E15: LATIN SMALL LETTER E WITH MACRON AND GRAVE + + "ḗ" // U+1E17: LATIN SMALL LETTER E WITH MACRON AND ACUTE + + "ḙ" // U+1E19: LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW + + "ḛ" // U+1E1B: LATIN SMALL LETTER E WITH TILDE BELOW + + "ḝ" // U+1E1D: LATIN SMALL LETTER E WITH CEDILLA AND BREVE + + "ẹ" // U+1EB9: LATIN SMALL LETTER E WITH DOT BELOW + + "ẻ" // U+1EBB: LATIN SMALL LETTER E WITH HOOK ABOVE + + "ẽ" // U+1EBD: LATIN SMALL LETTER E WITH TILDE + + "ế" // U+1EBF: LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE + + "ề" // U+1EC1: LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE + + "ể" // U+1EC3: LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + + "ễ" // U+1EC5: LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE + + "ệ" // U+1EC7: LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW + + "ₑ" // U+2091: LATIN SUBSCRIPT SMALL LETTER E + + "ⓔ" // U+24D4: CIRCLED LATIN SMALL LETTER E + + "ⱸ" // U+2C78: LATIN SMALL LETTER E WITH NOTCH + + "e" // U+FF45: FULLWIDTH LATIN SMALL LETTER E + ,"e", // Folded result + + "⒠" // U+24A0: PARENTHESIZED LATIN SMALL LETTER E + ,"(e)", // Folded result + + "Ƒ" // U+0191: LATIN CAPITAL LETTER F WITH HOOK + + "Ḟ" // U+1E1E: LATIN CAPITAL LETTER F WITH DOT ABOVE + + "Ⓕ" // U+24BB: CIRCLED LATIN CAPITAL LETTER F + + "ꜰ" // U+A730: LATIN LETTER SMALL CAPITAL F + + "Ꝼ" // U+A77B: LATIN CAPITAL LETTER INSULAR F + + "ꟻ" // U+A7FB: LATIN EPIGRAPHIC LETTER REVERSED F + + "F" // U+FF26: FULLWIDTH LATIN CAPITAL LETTER F + ,"F", // Folded result + + "ƒ" // U+0192: LATIN SMALL LETTER F WITH HOOK + + "ᵮ" // U+1D6E: LATIN SMALL LETTER F WITH MIDDLE TILDE + + "ᶂ" // U+1D82: LATIN SMALL LETTER F WITH PALATAL HOOK + + "ḟ" // U+1E1F: LATIN SMALL LETTER F WITH DOT ABOVE + + "ẛ" // U+1E9B: LATIN SMALL LETTER LONG S WITH DOT ABOVE + + "ⓕ" // U+24D5: CIRCLED LATIN SMALL LETTER F + + "ꝼ" // U+A77C: LATIN SMALL LETTER INSULAR F + + "f" // U+FF46: FULLWIDTH LATIN SMALL LETTER F + ,"f", // Folded result + + "⒡" // U+24A1: PARENTHESIZED LATIN SMALL LETTER F + ,"(f)", // Folded result + + "ff" // U+FB00: LATIN SMALL LIGATURE FF + ,"ff", // Folded result + + "ffi" // U+FB03: LATIN SMALL LIGATURE FFI + ,"ffi", // Folded result + + "ffl" // U+FB04: LATIN SMALL LIGATURE FFL + ,"ffl", // Folded result + + "fi" // U+FB01: LATIN SMALL LIGATURE FI + ,"fi", // Folded result + + "fl" // U+FB02: LATIN SMALL LIGATURE FL + ,"fl", // Folded result + + "Ĝ" // U+011C: LATIN CAPITAL LETTER G WITH CIRCUMFLEX + + "Ğ" // U+011E: LATIN CAPITAL LETTER G WITH BREVE + + "Ġ" // U+0120: LATIN CAPITAL LETTER G WITH DOT ABOVE + + "Ģ" // U+0122: LATIN CAPITAL LETTER G WITH CEDILLA + + "Ɠ" // U+0193: LATIN CAPITAL LETTER G WITH HOOK + + "Ǥ" // U+01E4: LATIN CAPITAL LETTER G WITH STROKE + + "ǥ" // U+01E5: LATIN SMALL LETTER G WITH STROKE + + "Ǧ" // U+01E6: LATIN CAPITAL LETTER G WITH CARON + + "ǧ" // U+01E7: LATIN SMALL LETTER G WITH CARON + + "Ǵ" // U+01F4: LATIN CAPITAL LETTER G WITH ACUTE + + "ɢ" // U+0262: LATIN LETTER SMALL CAPITAL G + + "ʛ" // U+029B: LATIN LETTER SMALL CAPITAL G WITH HOOK + + "Ḡ" // U+1E20: LATIN CAPITAL LETTER G WITH MACRON + + "Ⓖ" // U+24BC: CIRCLED LATIN CAPITAL LETTER G + + "Ᵹ" // U+A77D: LATIN CAPITAL LETTER INSULAR G + + "Ꝿ" // U+A77E: LATIN CAPITAL LETTER TURNED INSULAR G + + "G" // U+FF27: FULLWIDTH LATIN CAPITAL LETTER G + ,"G", // Folded result + + "ĝ" // U+011D: LATIN SMALL LETTER G WITH CIRCUMFLEX + + "ğ" // U+011F: LATIN SMALL LETTER G WITH BREVE + + "ġ" // U+0121: LATIN SMALL LETTER G WITH DOT ABOVE + + "ģ" // U+0123: LATIN SMALL LETTER G WITH CEDILLA + + "ǵ" // U+01F5: LATIN SMALL LETTER G WITH ACUTE + + "ɠ" // U+0260: LATIN SMALL LETTER G WITH HOOK + + "ɡ" // U+0261: LATIN SMALL LETTER SCRIPT G + + "ᵷ" // U+1D77: LATIN SMALL LETTER TURNED G + + "ᵹ" // U+1D79: LATIN SMALL LETTER INSULAR G + + "ᶃ" // U+1D83: LATIN SMALL LETTER G WITH PALATAL HOOK + + "ḡ" // U+1E21: LATIN SMALL LETTER G WITH MACRON + + "ⓖ" // U+24D6: CIRCLED LATIN SMALL LETTER G + + "ꝿ" // U+A77F: LATIN SMALL LETTER TURNED INSULAR G + + "g" // U+FF47: FULLWIDTH LATIN SMALL LETTER G + ,"g", // Folded result + + "⒢" // U+24A2: PARENTHESIZED LATIN SMALL LETTER G + ,"(g)", // Folded result + + "Ĥ" // U+0124: LATIN CAPITAL LETTER H WITH CIRCUMFLEX + + "Ħ" // U+0126: LATIN CAPITAL LETTER H WITH STROKE + + "Ȟ" // U+021E: LATIN CAPITAL LETTER H WITH CARON + + "ʜ" // U+029C: LATIN LETTER SMALL CAPITAL H + + "Ḣ" // U+1E22: LATIN CAPITAL LETTER H WITH DOT ABOVE + + "Ḥ" // U+1E24: LATIN CAPITAL LETTER H WITH DOT BELOW + + "Ḧ" // U+1E26: LATIN CAPITAL LETTER H WITH DIAERESIS + + "Ḩ" // U+1E28: LATIN CAPITAL LETTER H WITH CEDILLA + + "Ḫ" // U+1E2A: LATIN CAPITAL LETTER H WITH BREVE BELOW + + "Ⓗ" // U+24BD: CIRCLED LATIN CAPITAL LETTER H + + "Ⱨ" // U+2C67: LATIN CAPITAL LETTER H WITH DESCENDER + + "Ⱶ" // U+2C75: LATIN CAPITAL LETTER HALF H + + "H" // U+FF28: FULLWIDTH LATIN CAPITAL LETTER H + ,"H", // Folded result + + "ĥ" // U+0125: LATIN SMALL LETTER H WITH CIRCUMFLEX + + "ħ" // U+0127: LATIN SMALL LETTER H WITH STROKE + + "ȟ" // U+021F: LATIN SMALL LETTER H WITH CARON + + "ɥ" // U+0265: LATIN SMALL LETTER TURNED H + + "ɦ" // U+0266: LATIN SMALL LETTER H WITH HOOK + + "ʮ" // U+02AE: LATIN SMALL LETTER TURNED H WITH FISHHOOK + + "ʯ" // U+02AF: LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL + + "ḣ" // U+1E23: LATIN SMALL LETTER H WITH DOT ABOVE + + "ḥ" // U+1E25: LATIN SMALL LETTER H WITH DOT BELOW + + "ḧ" // U+1E27: LATIN SMALL LETTER H WITH DIAERESIS + + "ḩ" // U+1E29: LATIN SMALL LETTER H WITH CEDILLA + + "ḫ" // U+1E2B: LATIN SMALL LETTER H WITH BREVE BELOW + + "ẖ" // U+1E96: LATIN SMALL LETTER H WITH LINE BELOW + + "ⓗ" // U+24D7: CIRCLED LATIN SMALL LETTER H + + "ⱨ" // U+2C68: LATIN SMALL LETTER H WITH DESCENDER + + "ⱶ" // U+2C76: LATIN SMALL LETTER HALF H + + "h" // U+FF48: FULLWIDTH LATIN SMALL LETTER H + ,"h", // Folded result + + "Ƕ" // U+01F6: LATIN CAPITAL LETTER HWAIR + ,"HV", // Folded result + + "⒣" // U+24A3: PARENTHESIZED LATIN SMALL LETTER H + ,"(h)", // Folded result + + "ƕ" // U+0195: LATIN SMALL LETTER HV + ,"hv", // Folded result + + "Ì" // U+00CC: LATIN CAPITAL LETTER I WITH GRAVE + + "Í" // U+00CD: LATIN CAPITAL LETTER I WITH ACUTE + + "Î" // U+00CE: LATIN CAPITAL LETTER I WITH CIRCUMFLEX + + "Ï" // U+00CF: LATIN CAPITAL LETTER I WITH DIAERESIS + + "Ĩ" // U+0128: LATIN CAPITAL LETTER I WITH TILDE + + "Ī" // U+012A: LATIN CAPITAL LETTER I WITH MACRON + + "Ĭ" // U+012C: LATIN CAPITAL LETTER I WITH BREVE + + "Į" // U+012E: LATIN CAPITAL LETTER I WITH OGONEK + + "İ" // U+0130: LATIN CAPITAL LETTER I WITH DOT ABOVE + + "Ɩ" // U+0196: LATIN CAPITAL LETTER IOTA + + "Ɨ" // U+0197: LATIN CAPITAL LETTER I WITH STROKE + + "Ǐ" // U+01CF: LATIN CAPITAL LETTER I WITH CARON + + "Ȉ" // U+0208: LATIN CAPITAL LETTER I WITH DOUBLE GRAVE + + "Ȋ" // U+020A: LATIN CAPITAL LETTER I WITH INVERTED BREVE + + "ɪ" // U+026A: LATIN LETTER SMALL CAPITAL I + + "ᵻ" // U+1D7B: LATIN SMALL CAPITAL LETTER I WITH STROKE + + "Ḭ" // U+1E2C: LATIN CAPITAL LETTER I WITH TILDE BELOW + + "Ḯ" // U+1E2E: LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + + "Ỉ" // U+1EC8: LATIN CAPITAL LETTER I WITH HOOK ABOVE + + "Ị" // U+1ECA: LATIN CAPITAL LETTER I WITH DOT BELOW + + "Ⓘ" // U+24BE: CIRCLED LATIN CAPITAL LETTER I + + "ꟾ" // U+A7FE: LATIN EPIGRAPHIC LETTER I LONGA + + "I" // U+FF29: FULLWIDTH LATIN CAPITAL LETTER I + ,"I", // Folded result + + "ì" // U+00EC: LATIN SMALL LETTER I WITH GRAVE + + "í" // U+00ED: LATIN SMALL LETTER I WITH ACUTE + + "î" // U+00EE: LATIN SMALL LETTER I WITH CIRCUMFLEX + + "ï" // U+00EF: LATIN SMALL LETTER I WITH DIAERESIS + + "ĩ" // U+0129: LATIN SMALL LETTER I WITH TILDE + + "ī" // U+012B: LATIN SMALL LETTER I WITH MACRON + + "ĭ" // U+012D: LATIN SMALL LETTER I WITH BREVE + + "į" // U+012F: LATIN SMALL LETTER I WITH OGONEK + + "ı" // U+0131: LATIN SMALL LETTER DOTLESS I + + "ǐ" // U+01D0: LATIN SMALL LETTER I WITH CARON + + "ȉ" // U+0209: LATIN SMALL LETTER I WITH DOUBLE GRAVE + + "ȋ" // U+020B: LATIN SMALL LETTER I WITH INVERTED BREVE + + "ɨ" // U+0268: LATIN SMALL LETTER I WITH STROKE + + "ᴉ" // U+1D09: LATIN SMALL LETTER TURNED I + + "ᵢ" // U+1D62: LATIN SUBSCRIPT SMALL LETTER I + + "ᵼ" // U+1D7C: LATIN SMALL LETTER IOTA WITH STROKE + + "ᶖ" // U+1D96: LATIN SMALL LETTER I WITH RETROFLEX HOOK + + "ḭ" // U+1E2D: LATIN SMALL LETTER I WITH TILDE BELOW + + "ḯ" // U+1E2F: LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE + + "ỉ" // U+1EC9: LATIN SMALL LETTER I WITH HOOK ABOVE + + "ị" // U+1ECB: LATIN SMALL LETTER I WITH DOT BELOW + + "ⁱ" // U+2071: SUPERSCRIPT LATIN SMALL LETTER I + + "ⓘ" // U+24D8: CIRCLED LATIN SMALL LETTER I + + "i" // U+FF49: FULLWIDTH LATIN SMALL LETTER I + ,"i", // Folded result + + "IJ" // U+0132: LATIN CAPITAL LIGATURE IJ + ,"IJ", // Folded result + + "⒤" // U+24A4: PARENTHESIZED LATIN SMALL LETTER I + ,"(i)", // Folded result + + "ij" // U+0133: LATIN SMALL LIGATURE IJ + ,"ij", // Folded result + + "Ĵ" // U+0134: LATIN CAPITAL LETTER J WITH CIRCUMFLEX + + "Ɉ" // U+0248: LATIN CAPITAL LETTER J WITH STROKE + + "ᴊ" // U+1D0A: LATIN LETTER SMALL CAPITAL J + + "Ⓙ" // U+24BF: CIRCLED LATIN CAPITAL LETTER J + + "J" // U+FF2A: FULLWIDTH LATIN CAPITAL LETTER J + ,"J", // Folded result + + "ĵ" // U+0135: LATIN SMALL LETTER J WITH CIRCUMFLEX + + "ǰ" // U+01F0: LATIN SMALL LETTER J WITH CARON + + "ȷ" // U+0237: LATIN SMALL LETTER DOTLESS J + + "ɉ" // U+0249: LATIN SMALL LETTER J WITH STROKE + + "ɟ" // U+025F: LATIN SMALL LETTER DOTLESS J WITH STROKE + + "ʄ" // U+0284: LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK + + "ʝ" // U+029D: LATIN SMALL LETTER J WITH CROSSED-TAIL + + "ⓙ" // U+24D9: CIRCLED LATIN SMALL LETTER J + + "ⱼ" // U+2C7C: LATIN SUBSCRIPT SMALL LETTER J + + "j" // U+FF4A: FULLWIDTH LATIN SMALL LETTER J + ,"j", // Folded result + + "⒥" // U+24A5: PARENTHESIZED LATIN SMALL LETTER J + ,"(j)", // Folded result + + "Ķ" // U+0136: LATIN CAPITAL LETTER K WITH CEDILLA + + "Ƙ" // U+0198: LATIN CAPITAL LETTER K WITH HOOK + + "Ǩ" // U+01E8: LATIN CAPITAL LETTER K WITH CARON + + "ᴋ" // U+1D0B: LATIN LETTER SMALL CAPITAL K + + "Ḱ" // U+1E30: LATIN CAPITAL LETTER K WITH ACUTE + + "Ḳ" // U+1E32: LATIN CAPITAL LETTER K WITH DOT BELOW + + "Ḵ" // U+1E34: LATIN CAPITAL LETTER K WITH LINE BELOW + + "Ⓚ" // U+24C0: CIRCLED LATIN CAPITAL LETTER K + + "Ⱪ" // U+2C69: LATIN CAPITAL LETTER K WITH DESCENDER + + "Ꝁ" // U+A740: LATIN CAPITAL LETTER K WITH STROKE + + "Ꝃ" // U+A742: LATIN CAPITAL LETTER K WITH DIAGONAL STROKE + + "Ꝅ" // U+A744: LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE + + "K" // U+FF2B: FULLWIDTH LATIN CAPITAL LETTER K + ,"K", // Folded result + + "ķ" // U+0137: LATIN SMALL LETTER K WITH CEDILLA + + "ƙ" // U+0199: LATIN SMALL LETTER K WITH HOOK + + "ǩ" // U+01E9: LATIN SMALL LETTER K WITH CARON + + "ʞ" // U+029E: LATIN SMALL LETTER TURNED K + + "ᶄ" // U+1D84: LATIN SMALL LETTER K WITH PALATAL HOOK + + "ḱ" // U+1E31: LATIN SMALL LETTER K WITH ACUTE + + "ḳ" // U+1E33: LATIN SMALL LETTER K WITH DOT BELOW + + "ḵ" // U+1E35: LATIN SMALL LETTER K WITH LINE BELOW + + "ⓚ" // U+24DA: CIRCLED LATIN SMALL LETTER K + + "ⱪ" // U+2C6A: LATIN SMALL LETTER K WITH DESCENDER + + "ꝁ" // U+A741: LATIN SMALL LETTER K WITH STROKE + + "ꝃ" // U+A743: LATIN SMALL LETTER K WITH DIAGONAL STROKE + + "ꝅ" // U+A745: LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE + + "k" // U+FF4B: FULLWIDTH LATIN SMALL LETTER K + ,"k", // Folded result + + "⒦" // U+24A6: PARENTHESIZED LATIN SMALL LETTER K + ,"(k)", // Folded result + + "Ĺ" // U+0139: LATIN CAPITAL LETTER L WITH ACUTE + + "Ļ" // U+013B: LATIN CAPITAL LETTER L WITH CEDILLA + + "Ľ" // U+013D: LATIN CAPITAL LETTER L WITH CARON + + "Ŀ" // U+013F: LATIN CAPITAL LETTER L WITH MIDDLE DOT + + "Ł" // U+0141: LATIN CAPITAL LETTER L WITH STROKE + + "Ƚ" // U+023D: LATIN CAPITAL LETTER L WITH BAR + + "ʟ" // U+029F: LATIN LETTER SMALL CAPITAL L + + "ᴌ" // U+1D0C: LATIN LETTER SMALL CAPITAL L WITH STROKE + + "Ḷ" // U+1E36: LATIN CAPITAL LETTER L WITH DOT BELOW + + "Ḹ" // U+1E38: LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + + "Ḻ" // U+1E3A: LATIN CAPITAL LETTER L WITH LINE BELOW + + "Ḽ" // U+1E3C: LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + + "Ⓛ" // U+24C1: CIRCLED LATIN CAPITAL LETTER L + + "Ⱡ" // U+2C60: LATIN CAPITAL LETTER L WITH DOUBLE BAR + + "Ɫ" // U+2C62: LATIN CAPITAL LETTER L WITH MIDDLE TILDE + + "Ꝇ" // U+A746: LATIN CAPITAL LETTER BROKEN L + + "Ꝉ" // U+A748: LATIN CAPITAL LETTER L WITH HIGH STROKE + + "Ꞁ" // U+A780: LATIN CAPITAL LETTER TURNED L + + "L" // U+FF2C: FULLWIDTH LATIN CAPITAL LETTER L + ,"L", // Folded result + + "ĺ" // U+013A: LATIN SMALL LETTER L WITH ACUTE + + "ļ" // U+013C: LATIN SMALL LETTER L WITH CEDILLA + + "ľ" // U+013E: LATIN SMALL LETTER L WITH CARON + + "ŀ" // U+0140: LATIN SMALL LETTER L WITH MIDDLE DOT + + "ł" // U+0142: LATIN SMALL LETTER L WITH STROKE + + "ƚ" // U+019A: LATIN SMALL LETTER L WITH BAR + + "ȴ" // U+0234: LATIN SMALL LETTER L WITH CURL + + "ɫ" // U+026B: LATIN SMALL LETTER L WITH MIDDLE TILDE + + "ɬ" // U+026C: LATIN SMALL LETTER L WITH BELT + + "ɭ" // U+026D: LATIN SMALL LETTER L WITH RETROFLEX HOOK + + "ᶅ" // U+1D85: LATIN SMALL LETTER L WITH PALATAL HOOK + + "ḷ" // U+1E37: LATIN SMALL LETTER L WITH DOT BELOW + + "ḹ" // U+1E39: LATIN SMALL LETTER L WITH DOT BELOW AND MACRON + + "ḻ" // U+1E3B: LATIN SMALL LETTER L WITH LINE BELOW + + "ḽ" // U+1E3D: LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW + + "ⓛ" // U+24DB: CIRCLED LATIN SMALL LETTER L + + "ⱡ" // U+2C61: LATIN SMALL LETTER L WITH DOUBLE BAR + + "ꝇ" // U+A747: LATIN SMALL LETTER BROKEN L + + "ꝉ" // U+A749: LATIN SMALL LETTER L WITH HIGH STROKE + + "ꞁ" // U+A781: LATIN SMALL LETTER TURNED L + + "l" // U+FF4C: FULLWIDTH LATIN SMALL LETTER L + ,"l", // Folded result + + "LJ" // U+01C7: LATIN CAPITAL LETTER LJ + ,"LJ", // Folded result + + "Ỻ" // U+1EFA: LATIN CAPITAL LETTER MIDDLE-WELSH LL + ,"LL", // Folded result + + "Lj" // U+01C8: LATIN CAPITAL LETTER L WITH SMALL LETTER J + ,"Lj", // Folded result + + "⒧" // U+24A7: PARENTHESIZED LATIN SMALL LETTER L + ,"(l)", // Folded result + + "lj" // U+01C9: LATIN SMALL LETTER LJ + ,"lj", // Folded result + + "ỻ" // U+1EFB: LATIN SMALL LETTER MIDDLE-WELSH LL + ,"ll", // Folded result + + "ʪ" // U+02AA: LATIN SMALL LETTER LS DIGRAPH + ,"ls", // Folded result + + "ʫ" // U+02AB: LATIN SMALL LETTER LZ DIGRAPH + ,"lz", // Folded result + + "Ɯ" // U+019C: LATIN CAPITAL LETTER TURNED M + + "ᴍ" // U+1D0D: LATIN LETTER SMALL CAPITAL M + + "Ḿ" // U+1E3E: LATIN CAPITAL LETTER M WITH ACUTE + + "Ṁ" // U+1E40: LATIN CAPITAL LETTER M WITH DOT ABOVE + + "Ṃ" // U+1E42: LATIN CAPITAL LETTER M WITH DOT BELOW + + "Ⓜ" // U+24C2: CIRCLED LATIN CAPITAL LETTER M + + "Ɱ" // U+2C6E: LATIN CAPITAL LETTER M WITH HOOK + + "ꟽ" // U+A7FD: LATIN EPIGRAPHIC LETTER INVERTED M + + "ꟿ" // U+A7FF: LATIN EPIGRAPHIC LETTER ARCHAIC M + + "M" // U+FF2D: FULLWIDTH LATIN CAPITAL LETTER M + ,"M", // Folded result + + "ɯ" // U+026F: LATIN SMALL LETTER TURNED M + + "ɰ" // U+0270: LATIN SMALL LETTER TURNED M WITH LONG LEG + + "ɱ" // U+0271: LATIN SMALL LETTER M WITH HOOK + + "ᵯ" // U+1D6F: LATIN SMALL LETTER M WITH MIDDLE TILDE + + "ᶆ" // U+1D86: LATIN SMALL LETTER M WITH PALATAL HOOK + + "ḿ" // U+1E3F: LATIN SMALL LETTER M WITH ACUTE + + "ṁ" // U+1E41: LATIN SMALL LETTER M WITH DOT ABOVE + + "ṃ" // U+1E43: LATIN SMALL LETTER M WITH DOT BELOW + + "ⓜ" // U+24DC: CIRCLED LATIN SMALL LETTER M + + "m" // U+FF4D: FULLWIDTH LATIN SMALL LETTER M + ,"m", // Folded result + + "⒨" // U+24A8: PARENTHESIZED LATIN SMALL LETTER M + ,"(m)", // Folded result + + "Ñ" // U+00D1: LATIN CAPITAL LETTER N WITH TILDE + + "Ń" // U+0143: LATIN CAPITAL LETTER N WITH ACUTE + + "Ņ" // U+0145: LATIN CAPITAL LETTER N WITH CEDILLA + + "Ň" // U+0147: LATIN CAPITAL LETTER N WITH CARON + + "Ŋ" // U+014A: LATIN CAPITAL LETTER ENG + + "Ɲ" // U+019D: LATIN CAPITAL LETTER N WITH LEFT HOOK + + "Ǹ" // U+01F8: LATIN CAPITAL LETTER N WITH GRAVE + + "Ƞ" // U+0220: LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + + "ɴ" // U+0274: LATIN LETTER SMALL CAPITAL N + + "ᴎ" // U+1D0E: LATIN LETTER SMALL CAPITAL REVERSED N + + "Ṅ" // U+1E44: LATIN CAPITAL LETTER N WITH DOT ABOVE + + "Ṇ" // U+1E46: LATIN CAPITAL LETTER N WITH DOT BELOW + + "Ṉ" // U+1E48: LATIN CAPITAL LETTER N WITH LINE BELOW + + "Ṋ" // U+1E4A: LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + + "Ⓝ" // U+24C3: CIRCLED LATIN CAPITAL LETTER N + + "N" // U+FF2E: FULLWIDTH LATIN CAPITAL LETTER N + ,"N", // Folded result + + "ñ" // U+00F1: LATIN SMALL LETTER N WITH TILDE + + "ń" // U+0144: LATIN SMALL LETTER N WITH ACUTE + + "ņ" // U+0146: LATIN SMALL LETTER N WITH CEDILLA + + "ň" // U+0148: LATIN SMALL LETTER N WITH CARON + + "ʼn" // U+0149: LATIN SMALL LETTER N PRECEDED BY APOSTROPHE + + "ŋ" // U+014B: LATIN SMALL LETTER ENG + + "ƞ" // U+019E: LATIN SMALL LETTER N WITH LONG RIGHT LEG + + "ǹ" // U+01F9: LATIN SMALL LETTER N WITH GRAVE + + "ȵ" // U+0235: LATIN SMALL LETTER N WITH CURL + + "ɲ" // U+0272: LATIN SMALL LETTER N WITH LEFT HOOK + + "ɳ" // U+0273: LATIN SMALL LETTER N WITH RETROFLEX HOOK + + "ᵰ" // U+1D70: LATIN SMALL LETTER N WITH MIDDLE TILDE + + "ᶇ" // U+1D87: LATIN SMALL LETTER N WITH PALATAL HOOK + + "ṅ" // U+1E45: LATIN SMALL LETTER N WITH DOT ABOVE + + "ṇ" // U+1E47: LATIN SMALL LETTER N WITH DOT BELOW + + "ṉ" // U+1E49: LATIN SMALL LETTER N WITH LINE BELOW + + "ṋ" // U+1E4B: LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW + + "ⁿ" // U+207F: SUPERSCRIPT LATIN SMALL LETTER N + + "ⓝ" // U+24DD: CIRCLED LATIN SMALL LETTER N + + "n" // U+FF4E: FULLWIDTH LATIN SMALL LETTER N + ,"n", // Folded result + + "NJ" // U+01CA: LATIN CAPITAL LETTER NJ + ,"NJ", // Folded result + + "Nj" // U+01CB: LATIN CAPITAL LETTER N WITH SMALL LETTER J + ,"Nj", // Folded result + + "⒩" // U+24A9: PARENTHESIZED LATIN SMALL LETTER N + ,"(n)", // Folded result + + "nj" // U+01CC: LATIN SMALL LETTER NJ + ,"nj", // Folded result + + "Ò" // U+00D2: LATIN CAPITAL LETTER O WITH GRAVE + + "Ó" // U+00D3: LATIN CAPITAL LETTER O WITH ACUTE + + "Ô" // U+00D4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX + + "Õ" // U+00D5: LATIN CAPITAL LETTER O WITH TILDE + + "Ö" // U+00D6: LATIN CAPITAL LETTER O WITH DIAERESIS + + "Ø" // U+00D8: LATIN CAPITAL LETTER O WITH STROKE + + "Ō" // U+014C: LATIN CAPITAL LETTER O WITH MACRON + + "Ŏ" // U+014E: LATIN CAPITAL LETTER O WITH BREVE + + "Ő" // U+0150: LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + + "Ɔ" // U+0186: LATIN CAPITAL LETTER OPEN O + + "Ɵ" // U+019F: LATIN CAPITAL LETTER O WITH MIDDLE TILDE + + "Ơ" // U+01A0: LATIN CAPITAL LETTER O WITH HORN + + "Ǒ" // U+01D1: LATIN CAPITAL LETTER O WITH CARON + + "Ǫ" // U+01EA: LATIN CAPITAL LETTER O WITH OGONEK + + "Ǭ" // U+01EC: LATIN CAPITAL LETTER O WITH OGONEK AND MACRON + + "Ǿ" // U+01FE: LATIN CAPITAL LETTER O WITH STROKE AND ACUTE + + "Ȍ" // U+020C: LATIN CAPITAL LETTER O WITH DOUBLE GRAVE + + "Ȏ" // U+020E: LATIN CAPITAL LETTER O WITH INVERTED BREVE + + "Ȫ" // U+022A: LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON + + "Ȭ" // U+022C: LATIN CAPITAL LETTER O WITH TILDE AND MACRON + + "Ȯ" // U+022E: LATIN CAPITAL LETTER O WITH DOT ABOVE + + "Ȱ" // U+0230: LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + + "ᴏ" // U+1D0F: LATIN LETTER SMALL CAPITAL O + + "ᴐ" // U+1D10: LATIN LETTER SMALL CAPITAL OPEN O + + "Ṍ" // U+1E4C: LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + + "Ṏ" // U+1E4E: LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + + "Ṑ" // U+1E50: LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + + "Ṓ" // U+1E52: LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + + "Ọ" // U+1ECC: LATIN CAPITAL LETTER O WITH DOT BELOW + + "Ỏ" // U+1ECE: LATIN CAPITAL LETTER O WITH HOOK ABOVE + + "Ố" // U+1ED0: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + + "Ồ" // U+1ED2: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + + "Ổ" // U+1ED4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + + "Ỗ" // U+1ED6: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + + "Ộ" // U+1ED8: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + + "Ớ" // U+1EDA: LATIN CAPITAL LETTER O WITH HORN AND ACUTE + + "Ờ" // U+1EDC: LATIN CAPITAL LETTER O WITH HORN AND GRAVE + + "Ở" // U+1EDE: LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + + "Ỡ" // U+1EE0: LATIN CAPITAL LETTER O WITH HORN AND TILDE + + "Ợ" // U+1EE2: LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + + "Ⓞ" // U+24C4: CIRCLED LATIN CAPITAL LETTER O + + "Ꝋ" // U+A74A: LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY + + "Ꝍ" // U+A74C: LATIN CAPITAL LETTER O WITH LOOP + + "O" // U+FF2F: FULLWIDTH LATIN CAPITAL LETTER O + ,"O", // Folded result + + "ò" // U+00F2: LATIN SMALL LETTER O WITH GRAVE + + "ó" // U+00F3: LATIN SMALL LETTER O WITH ACUTE + + "ô" // U+00F4: LATIN SMALL LETTER O WITH CIRCUMFLEX + + "õ" // U+00F5: LATIN SMALL LETTER O WITH TILDE + + "ö" // U+00F6: LATIN SMALL LETTER O WITH DIAERESIS + + "ø" // U+00F8: LATIN SMALL LETTER O WITH STROKE + + "ō" // U+014D: LATIN SMALL LETTER O WITH MACRON + + "ŏ" // U+014F: LATIN SMALL LETTER O WITH BREVE + + "ő" // U+0151: LATIN SMALL LETTER O WITH DOUBLE ACUTE + + "ơ" // U+01A1: LATIN SMALL LETTER O WITH HORN + + "ǒ" // U+01D2: LATIN SMALL LETTER O WITH CARON + + "ǫ" // U+01EB: LATIN SMALL LETTER O WITH OGONEK + + "ǭ" // U+01ED: LATIN SMALL LETTER O WITH OGONEK AND MACRON + + "ǿ" // U+01FF: LATIN SMALL LETTER O WITH STROKE AND ACUTE + + "ȍ" // U+020D: LATIN SMALL LETTER O WITH DOUBLE GRAVE + + "ȏ" // U+020F: LATIN SMALL LETTER O WITH INVERTED BREVE + + "ȫ" // U+022B: LATIN SMALL LETTER O WITH DIAERESIS AND MACRON + + "ȭ" // U+022D: LATIN SMALL LETTER O WITH TILDE AND MACRON + + "ȯ" // U+022F: LATIN SMALL LETTER O WITH DOT ABOVE + + "ȱ" // U+0231: LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON + + "ɔ" // U+0254: LATIN SMALL LETTER OPEN O + + "ɵ" // U+0275: LATIN SMALL LETTER BARRED O + + "ᴖ" // U+1D16: LATIN SMALL LETTER TOP HALF O + + "ᴗ" // U+1D17: LATIN SMALL LETTER BOTTOM HALF O + + "ᶗ" // U+1D97: LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK + + "ṍ" // U+1E4D: LATIN SMALL LETTER O WITH TILDE AND ACUTE + + "ṏ" // U+1E4F: LATIN SMALL LETTER O WITH TILDE AND DIAERESIS + + "ṑ" // U+1E51: LATIN SMALL LETTER O WITH MACRON AND GRAVE + + "ṓ" // U+1E53: LATIN SMALL LETTER O WITH MACRON AND ACUTE + + "ọ" // U+1ECD: LATIN SMALL LETTER O WITH DOT BELOW + + "ỏ" // U+1ECF: LATIN SMALL LETTER O WITH HOOK ABOVE + + "ố" // U+1ED1: LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE + + "ồ" // U+1ED3: LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE + + "ổ" // U+1ED5: LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + + "ỗ" // U+1ED7: LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE + + "ộ" // U+1ED9: LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW + + "ớ" // U+1EDB: LATIN SMALL LETTER O WITH HORN AND ACUTE + + "ờ" // U+1EDD: LATIN SMALL LETTER O WITH HORN AND GRAVE + + "ở" // U+1EDF: LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE + + "ỡ" // U+1EE1: LATIN SMALL LETTER O WITH HORN AND TILDE + + "ợ" // U+1EE3: LATIN SMALL LETTER O WITH HORN AND DOT BELOW + + "ₒ" // U+2092: LATIN SUBSCRIPT SMALL LETTER O + + "ⓞ" // U+24DE: CIRCLED LATIN SMALL LETTER O + + "ⱺ" // U+2C7A: LATIN SMALL LETTER O WITH LOW RING INSIDE + + "ꝋ" // U+A74B: LATIN SMALL LETTER O WITH LONG STROKE OVERLAY + + "ꝍ" // U+A74D: LATIN SMALL LETTER O WITH LOOP + + "o" // U+FF4F: FULLWIDTH LATIN SMALL LETTER O + ,"o", // Folded result + + "Œ" // U+0152: LATIN CAPITAL LIGATURE OE + + "ɶ" // U+0276: LATIN LETTER SMALL CAPITAL OE + ,"OE", // Folded result + + "Ꝏ" // U+A74E: LATIN CAPITAL LETTER OO + ,"OO", // Folded result + + "Ȣ" // U+0222: LATIN CAPITAL LETTER OU + + "ᴕ" // U+1D15: LATIN LETTER SMALL CAPITAL OU + ,"OU", // Folded result + + "⒪" // U+24AA: PARENTHESIZED LATIN SMALL LETTER O + ,"(o)", // Folded result + + "œ" // U+0153: LATIN SMALL LIGATURE OE + + "ᴔ" // U+1D14: LATIN SMALL LETTER TURNED OE + ,"oe", // Folded result + + "ꝏ" // U+A74F: LATIN SMALL LETTER OO + ,"oo", // Folded result + + "ȣ" // U+0223: LATIN SMALL LETTER OU + ,"ou", // Folded result + + "Ƥ" // U+01A4: LATIN CAPITAL LETTER P WITH HOOK + + "ᴘ" // U+1D18: LATIN LETTER SMALL CAPITAL P + + "Ṕ" // U+1E54: LATIN CAPITAL LETTER P WITH ACUTE + + "Ṗ" // U+1E56: LATIN CAPITAL LETTER P WITH DOT ABOVE + + "Ⓟ" // U+24C5: CIRCLED LATIN CAPITAL LETTER P + + "Ᵽ" // U+2C63: LATIN CAPITAL LETTER P WITH STROKE + + "Ꝑ" // U+A750: LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER + + "Ꝓ" // U+A752: LATIN CAPITAL LETTER P WITH FLOURISH + + "Ꝕ" // U+A754: LATIN CAPITAL LETTER P WITH SQUIRREL TAIL + + "P" // U+FF30: FULLWIDTH LATIN CAPITAL LETTER P + ,"P", // Folded result + + "ƥ" // U+01A5: LATIN SMALL LETTER P WITH HOOK + + "ᵱ" // U+1D71: LATIN SMALL LETTER P WITH MIDDLE TILDE + + "ᵽ" // U+1D7D: LATIN SMALL LETTER P WITH STROKE + + "ᶈ" // U+1D88: LATIN SMALL LETTER P WITH PALATAL HOOK + + "ṕ" // U+1E55: LATIN SMALL LETTER P WITH ACUTE + + "ṗ" // U+1E57: LATIN SMALL LETTER P WITH DOT ABOVE + + "ⓟ" // U+24DF: CIRCLED LATIN SMALL LETTER P + + "ꝑ" // U+A751: LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER + + "ꝓ" // U+A753: LATIN SMALL LETTER P WITH FLOURISH + + "ꝕ" // U+A755: LATIN SMALL LETTER P WITH SQUIRREL TAIL + + "ꟼ" // U+A7FC: LATIN EPIGRAPHIC LETTER REVERSED P + + "p" // U+FF50: FULLWIDTH LATIN SMALL LETTER P + ,"p", // Folded result + + "⒫" // U+24AB: PARENTHESIZED LATIN SMALL LETTER P + ,"(p)", // Folded result + + "Ɋ" // U+024A: LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL + + "Ⓠ" // U+24C6: CIRCLED LATIN CAPITAL LETTER Q + + "Ꝗ" // U+A756: LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER + + "Ꝙ" // U+A758: LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE + + "Q" // U+FF31: FULLWIDTH LATIN CAPITAL LETTER Q + ,"Q", // Folded result + + "ĸ" // U+0138: LATIN SMALL LETTER KRA + + "ɋ" // U+024B: LATIN SMALL LETTER Q WITH HOOK TAIL + + "ʠ" // U+02A0: LATIN SMALL LETTER Q WITH HOOK + + "ⓠ" // U+24E0: CIRCLED LATIN SMALL LETTER Q + + "ꝗ" // U+A757: LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER + + "ꝙ" // U+A759: LATIN SMALL LETTER Q WITH DIAGONAL STROKE + + "q" // U+FF51: FULLWIDTH LATIN SMALL LETTER Q + ,"q", // Folded result + + "⒬" // U+24AC: PARENTHESIZED LATIN SMALL LETTER Q + ,"(q)", // Folded result + + "ȹ" // U+0239: LATIN SMALL LETTER QP DIGRAPH + ,"qp", // Folded result + + "Ŕ" // U+0154: LATIN CAPITAL LETTER R WITH ACUTE + + "Ŗ" // U+0156: LATIN CAPITAL LETTER R WITH CEDILLA + + "Ř" // U+0158: LATIN CAPITAL LETTER R WITH CARON + + "Ȑ" // U+0210: LATIN CAPITAL LETTER R WITH DOUBLE GRAVE + + "Ȓ" // U+0212: LATIN CAPITAL LETTER R WITH INVERTED BREVE + + "Ɍ" // U+024C: LATIN CAPITAL LETTER R WITH STROKE + + "ʀ" // U+0280: LATIN LETTER SMALL CAPITAL R + + "ʁ" // U+0281: LATIN LETTER SMALL CAPITAL INVERTED R + + "ᴙ" // U+1D19: LATIN LETTER SMALL CAPITAL REVERSED R + + "ᴚ" // U+1D1A: LATIN LETTER SMALL CAPITAL TURNED R + + "Ṙ" // U+1E58: LATIN CAPITAL LETTER R WITH DOT ABOVE + + "Ṛ" // U+1E5A: LATIN CAPITAL LETTER R WITH DOT BELOW + + "Ṝ" // U+1E5C: LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + + "Ṟ" // U+1E5E: LATIN CAPITAL LETTER R WITH LINE BELOW + + "Ⓡ" // U+24C7: CIRCLED LATIN CAPITAL LETTER R + + "Ɽ" // U+2C64: LATIN CAPITAL LETTER R WITH TAIL + + "Ꝛ" // U+A75A: LATIN CAPITAL LETTER R ROTUNDA + + "Ꞃ" // U+A782: LATIN CAPITAL LETTER INSULAR R + + "R" // U+FF32: FULLWIDTH LATIN CAPITAL LETTER R + ,"R", // Folded result + + "ŕ" // U+0155: LATIN SMALL LETTER R WITH ACUTE + + "ŗ" // U+0157: LATIN SMALL LETTER R WITH CEDILLA + + "ř" // U+0159: LATIN SMALL LETTER R WITH CARON + + "ȑ" // U+0211: LATIN SMALL LETTER R WITH DOUBLE GRAVE + + "ȓ" // U+0213: LATIN SMALL LETTER R WITH INVERTED BREVE + + "ɍ" // U+024D: LATIN SMALL LETTER R WITH STROKE + + "ɼ" // U+027C: LATIN SMALL LETTER R WITH LONG LEG + + "ɽ" // U+027D: LATIN SMALL LETTER R WITH TAIL + + "ɾ" // U+027E: LATIN SMALL LETTER R WITH FISHHOOK + + "ɿ" // U+027F: LATIN SMALL LETTER REVERSED R WITH FISHHOOK + + "ᵣ" // U+1D63: LATIN SUBSCRIPT SMALL LETTER R + + "ᵲ" // U+1D72: LATIN SMALL LETTER R WITH MIDDLE TILDE + + "ᵳ" // U+1D73: LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE + + "ᶉ" // U+1D89: LATIN SMALL LETTER R WITH PALATAL HOOK + + "ṙ" // U+1E59: LATIN SMALL LETTER R WITH DOT ABOVE + + "ṛ" // U+1E5B: LATIN SMALL LETTER R WITH DOT BELOW + + "ṝ" // U+1E5D: LATIN SMALL LETTER R WITH DOT BELOW AND MACRON + + "ṟ" // U+1E5F: LATIN SMALL LETTER R WITH LINE BELOW + + "ⓡ" // U+24E1: CIRCLED LATIN SMALL LETTER R + + "ꝛ" // U+A75B: LATIN SMALL LETTER R ROTUNDA + + "ꞃ" // U+A783: LATIN SMALL LETTER INSULAR R + + "r" // U+FF52: FULLWIDTH LATIN SMALL LETTER R + ,"r", // Folded result + + "⒭" // U+24AD: PARENTHESIZED LATIN SMALL LETTER R + ,"(r)", // Folded result + + "Ś" // U+015A: LATIN CAPITAL LETTER S WITH ACUTE + + "Ŝ" // U+015C: LATIN CAPITAL LETTER S WITH CIRCUMFLEX + + "Ş" // U+015E: LATIN CAPITAL LETTER S WITH CEDILLA + + "Š" // U+0160: LATIN CAPITAL LETTER S WITH CARON + + "Ș" // U+0218: LATIN CAPITAL LETTER S WITH COMMA BELOW + + "Ṡ" // U+1E60: LATIN CAPITAL LETTER S WITH DOT ABOVE + + "Ṣ" // U+1E62: LATIN CAPITAL LETTER S WITH DOT BELOW + + "Ṥ" // U+1E64: LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + + "Ṧ" // U+1E66: LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + + "Ṩ" // U+1E68: LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + + "Ⓢ" // U+24C8: CIRCLED LATIN CAPITAL LETTER S + + "ꜱ" // U+A731: LATIN LETTER SMALL CAPITAL S + + "ꞅ" // U+A785: LATIN SMALL LETTER INSULAR S + + "S" // U+FF33: FULLWIDTH LATIN CAPITAL LETTER S + ,"S", // Folded result + + "ś" // U+015B: LATIN SMALL LETTER S WITH ACUTE + + "ŝ" // U+015D: LATIN SMALL LETTER S WITH CIRCUMFLEX + + "ş" // U+015F: LATIN SMALL LETTER S WITH CEDILLA + + "š" // U+0161: LATIN SMALL LETTER S WITH CARON + + "ſ" // U+017F: LATIN SMALL LETTER LONG S + + "ș" // U+0219: LATIN SMALL LETTER S WITH COMMA BELOW + + "ȿ" // U+023F: LATIN SMALL LETTER S WITH SWASH TAIL + + "ʂ" // U+0282: LATIN SMALL LETTER S WITH HOOK + + "ᵴ" // U+1D74: LATIN SMALL LETTER S WITH MIDDLE TILDE + + "ᶊ" // U+1D8A: LATIN SMALL LETTER S WITH PALATAL HOOK + + "ṡ" // U+1E61: LATIN SMALL LETTER S WITH DOT ABOVE + + "ṣ" // U+1E63: LATIN SMALL LETTER S WITH DOT BELOW + + "ṥ" // U+1E65: LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE + + "ṧ" // U+1E67: LATIN SMALL LETTER S WITH CARON AND DOT ABOVE + + "ṩ" // U+1E69: LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE + + "ẜ" // U+1E9C: LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE + + "ẝ" // U+1E9D: LATIN SMALL LETTER LONG S WITH HIGH STROKE + + "ⓢ" // U+24E2: CIRCLED LATIN SMALL LETTER S + + "Ꞅ" // U+A784: LATIN CAPITAL LETTER INSULAR S + + "s" // U+FF53: FULLWIDTH LATIN SMALL LETTER S + ,"s", // Folded result + + "ẞ" // U+1E9E: LATIN CAPITAL LETTER SHARP S + ,"SS", // Folded result + + "⒮" // U+24AE: PARENTHESIZED LATIN SMALL LETTER S + ,"(s)", // Folded result + + "ß" // U+00DF: LATIN SMALL LETTER SHARP S + ,"ss", // Folded result + + "st" // U+FB06: LATIN SMALL LIGATURE ST + ,"st", // Folded result + + "Ţ" // U+0162: LATIN CAPITAL LETTER T WITH CEDILLA + + "Ť" // U+0164: LATIN CAPITAL LETTER T WITH CARON + + "Ŧ" // U+0166: LATIN CAPITAL LETTER T WITH STROKE + + "Ƭ" // U+01AC: LATIN CAPITAL LETTER T WITH HOOK + + "Ʈ" // U+01AE: LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + + "Ț" // U+021A: LATIN CAPITAL LETTER T WITH COMMA BELOW + + "Ⱦ" // U+023E: LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + + "ᴛ" // U+1D1B: LATIN LETTER SMALL CAPITAL T + + "Ṫ" // U+1E6A: LATIN CAPITAL LETTER T WITH DOT ABOVE + + "Ṭ" // U+1E6C: LATIN CAPITAL LETTER T WITH DOT BELOW + + "Ṯ" // U+1E6E: LATIN CAPITAL LETTER T WITH LINE BELOW + + "Ṱ" // U+1E70: LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + + "Ⓣ" // U+24C9: CIRCLED LATIN CAPITAL LETTER T + + "Ꞇ" // U+A786: LATIN CAPITAL LETTER INSULAR T + + "T" // U+FF34: FULLWIDTH LATIN CAPITAL LETTER T + ,"T", // Folded result + + "ţ" // U+0163: LATIN SMALL LETTER T WITH CEDILLA + + "ť" // U+0165: LATIN SMALL LETTER T WITH CARON + + "ŧ" // U+0167: LATIN SMALL LETTER T WITH STROKE + + "ƫ" // U+01AB: LATIN SMALL LETTER T WITH PALATAL HOOK + + "ƭ" // U+01AD: LATIN SMALL LETTER T WITH HOOK + + "ț" // U+021B: LATIN SMALL LETTER T WITH COMMA BELOW + + "ȶ" // U+0236: LATIN SMALL LETTER T WITH CURL + + "ʇ" // U+0287: LATIN SMALL LETTER TURNED T + + "ʈ" // U+0288: LATIN SMALL LETTER T WITH RETROFLEX HOOK + + "ᵵ" // U+1D75: LATIN SMALL LETTER T WITH MIDDLE TILDE + + "ṫ" // U+1E6B: LATIN SMALL LETTER T WITH DOT ABOVE + + "ṭ" // U+1E6D: LATIN SMALL LETTER T WITH DOT BELOW + + "ṯ" // U+1E6F: LATIN SMALL LETTER T WITH LINE BELOW + + "ṱ" // U+1E71: LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW + + "ẗ" // U+1E97: LATIN SMALL LETTER T WITH DIAERESIS + + "ⓣ" // U+24E3: CIRCLED LATIN SMALL LETTER T + + "ⱦ" // U+2C66: LATIN SMALL LETTER T WITH DIAGONAL STROKE + + "t" // U+FF54: FULLWIDTH LATIN SMALL LETTER T + ,"t", // Folded result + + "Þ" // U+00DE: LATIN CAPITAL LETTER THORN + + "Ꝧ" // U+A766: LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER + ,"TH", // Folded result + + "Ꜩ" // U+A728: LATIN CAPITAL LETTER TZ + ,"TZ", // Folded result + + "⒯" // U+24AF: PARENTHESIZED LATIN SMALL LETTER T + ,"(t)", // Folded result + + "ʨ" // U+02A8: LATIN SMALL LETTER TC DIGRAPH WITH CURL + ,"tc", // Folded result + + "þ" // U+00FE: LATIN SMALL LETTER THORN + + "ᵺ" // U+1D7A: LATIN SMALL LETTER TH WITH STRIKETHROUGH + + "ꝧ" // U+A767: LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER + ,"th", // Folded result + + "ʦ" // U+02A6: LATIN SMALL LETTER TS DIGRAPH + ,"ts", // Folded result + + "ꜩ" // U+A729: LATIN SMALL LETTER TZ + ,"tz", // Folded result + + "Ù" // U+00D9: LATIN CAPITAL LETTER U WITH GRAVE + + "Ú" // U+00DA: LATIN CAPITAL LETTER U WITH ACUTE + + "Û" // U+00DB: LATIN CAPITAL LETTER U WITH CIRCUMFLEX + + "Ü" // U+00DC: LATIN CAPITAL LETTER U WITH DIAERESIS + + "Ũ" // U+0168: LATIN CAPITAL LETTER U WITH TILDE + + "Ū" // U+016A: LATIN CAPITAL LETTER U WITH MACRON + + "Ŭ" // U+016C: LATIN CAPITAL LETTER U WITH BREVE + + "Ů" // U+016E: LATIN CAPITAL LETTER U WITH RING ABOVE + + "Ű" // U+0170: LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + + "Ų" // U+0172: LATIN CAPITAL LETTER U WITH OGONEK + + "Ư" // U+01AF: LATIN CAPITAL LETTER U WITH HORN + + "Ǔ" // U+01D3: LATIN CAPITAL LETTER U WITH CARON + + "Ǖ" // U+01D5: LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON + + "Ǘ" // U+01D7: LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE + + "Ǚ" // U+01D9: LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON + + "Ǜ" // U+01DB: LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE + + "Ȕ" // U+0214: LATIN CAPITAL LETTER U WITH DOUBLE GRAVE + + "Ȗ" // U+0216: LATIN CAPITAL LETTER U WITH INVERTED BREVE + + "Ʉ" // U+0244: LATIN CAPITAL LETTER U BAR + + "ᴜ" // U+1D1C: LATIN LETTER SMALL CAPITAL U + + "ᵾ" // U+1D7E: LATIN SMALL CAPITAL LETTER U WITH STROKE + + "Ṳ" // U+1E72: LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + + "Ṵ" // U+1E74: LATIN CAPITAL LETTER U WITH TILDE BELOW + + "Ṷ" // U+1E76: LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + + "Ṹ" // U+1E78: LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + + "Ṻ" // U+1E7A: LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + + "Ụ" // U+1EE4: LATIN CAPITAL LETTER U WITH DOT BELOW + + "Ủ" // U+1EE6: LATIN CAPITAL LETTER U WITH HOOK ABOVE + + "Ứ" // U+1EE8: LATIN CAPITAL LETTER U WITH HORN AND ACUTE + + "Ừ" // U+1EEA: LATIN CAPITAL LETTER U WITH HORN AND GRAVE + + "Ử" // U+1EEC: LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + + "Ữ" // U+1EEE: LATIN CAPITAL LETTER U WITH HORN AND TILDE + + "Ự" // U+1EF0: LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + + "Ⓤ" // U+24CA: CIRCLED LATIN CAPITAL LETTER U + + "U" // U+FF35: FULLWIDTH LATIN CAPITAL LETTER U + ,"U", // Folded result + + "ù" // U+00F9: LATIN SMALL LETTER U WITH GRAVE + + "ú" // U+00FA: LATIN SMALL LETTER U WITH ACUTE + + "û" // U+00FB: LATIN SMALL LETTER U WITH CIRCUMFLEX + + "ü" // U+00FC: LATIN SMALL LETTER U WITH DIAERESIS + + "ũ" // U+0169: LATIN SMALL LETTER U WITH TILDE + + "ū" // U+016B: LATIN SMALL LETTER U WITH MACRON + + "ŭ" // U+016D: LATIN SMALL LETTER U WITH BREVE + + "ů" // U+016F: LATIN SMALL LETTER U WITH RING ABOVE + + "ű" // U+0171: LATIN SMALL LETTER U WITH DOUBLE ACUTE + + "ų" // U+0173: LATIN SMALL LETTER U WITH OGONEK + + "ư" // U+01B0: LATIN SMALL LETTER U WITH HORN + + "ǔ" // U+01D4: LATIN SMALL LETTER U WITH CARON + + "ǖ" // U+01D6: LATIN SMALL LETTER U WITH DIAERESIS AND MACRON + + "ǘ" // U+01D8: LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE + + "ǚ" // U+01DA: LATIN SMALL LETTER U WITH DIAERESIS AND CARON + + "ǜ" // U+01DC: LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE + + "ȕ" // U+0215: LATIN SMALL LETTER U WITH DOUBLE GRAVE + + "ȗ" // U+0217: LATIN SMALL LETTER U WITH INVERTED BREVE + + "ʉ" // U+0289: LATIN SMALL LETTER U BAR + + "ᵤ" // U+1D64: LATIN SUBSCRIPT SMALL LETTER U + + "ᶙ" // U+1D99: LATIN SMALL LETTER U WITH RETROFLEX HOOK + + "ṳ" // U+1E73: LATIN SMALL LETTER U WITH DIAERESIS BELOW + + "ṵ" // U+1E75: LATIN SMALL LETTER U WITH TILDE BELOW + + "ṷ" // U+1E77: LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW + + "ṹ" // U+1E79: LATIN SMALL LETTER U WITH TILDE AND ACUTE + + "ṻ" // U+1E7B: LATIN SMALL LETTER U WITH MACRON AND DIAERESIS + + "ụ" // U+1EE5: LATIN SMALL LETTER U WITH DOT BELOW + + "ủ" // U+1EE7: LATIN SMALL LETTER U WITH HOOK ABOVE + + "ứ" // U+1EE9: LATIN SMALL LETTER U WITH HORN AND ACUTE + + "ừ" // U+1EEB: LATIN SMALL LETTER U WITH HORN AND GRAVE + + "ử" // U+1EED: LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE + + "ữ" // U+1EEF: LATIN SMALL LETTER U WITH HORN AND TILDE + + "ự" // U+1EF1: LATIN SMALL LETTER U WITH HORN AND DOT BELOW + + "ⓤ" // U+24E4: CIRCLED LATIN SMALL LETTER U + + "u" // U+FF55: FULLWIDTH LATIN SMALL LETTER U + ,"u", // Folded result + + "⒰" // U+24B0: PARENTHESIZED LATIN SMALL LETTER U + ,"(u)", // Folded result + + "ᵫ" // U+1D6B: LATIN SMALL LETTER UE + ,"ue", // Folded result + + "Ʋ" // U+01B2: LATIN CAPITAL LETTER V WITH HOOK + + "Ʌ" // U+0245: LATIN CAPITAL LETTER TURNED V + + "ᴠ" // U+1D20: LATIN LETTER SMALL CAPITAL V + + "Ṽ" // U+1E7C: LATIN CAPITAL LETTER V WITH TILDE + + "Ṿ" // U+1E7E: LATIN CAPITAL LETTER V WITH DOT BELOW + + "Ỽ" // U+1EFC: LATIN CAPITAL LETTER MIDDLE-WELSH V + + "Ⓥ" // U+24CB: CIRCLED LATIN CAPITAL LETTER V + + "Ꝟ" // U+A75E: LATIN CAPITAL LETTER V WITH DIAGONAL STROKE + + "Ꝩ" // U+A768: LATIN CAPITAL LETTER VEND + + "V" // U+FF36: FULLWIDTH LATIN CAPITAL LETTER V + ,"V", // Folded result + + "ʋ" // U+028B: LATIN SMALL LETTER V WITH HOOK + + "ʌ" // U+028C: LATIN SMALL LETTER TURNED V + + "ᵥ" // U+1D65: LATIN SUBSCRIPT SMALL LETTER V + + "ᶌ" // U+1D8C: LATIN SMALL LETTER V WITH PALATAL HOOK + + "ṽ" // U+1E7D: LATIN SMALL LETTER V WITH TILDE + + "ṿ" // U+1E7F: LATIN SMALL LETTER V WITH DOT BELOW + + "ⓥ" // U+24E5: CIRCLED LATIN SMALL LETTER V + + "ⱱ" // U+2C71: LATIN SMALL LETTER V WITH RIGHT HOOK + + "ⱴ" // U+2C74: LATIN SMALL LETTER V WITH CURL + + "ꝟ" // U+A75F: LATIN SMALL LETTER V WITH DIAGONAL STROKE + + "v" // U+FF56: FULLWIDTH LATIN SMALL LETTER V + ,"v", // Folded result + + "Ꝡ" // U+A760: LATIN CAPITAL LETTER VY + ,"VY", // Folded result + + "⒱" // U+24B1: PARENTHESIZED LATIN SMALL LETTER V + ,"(v)", // Folded result + + "ꝡ" // U+A761: LATIN SMALL LETTER VY + ,"vy", // Folded result + + "Ŵ" // U+0174: LATIN CAPITAL LETTER W WITH CIRCUMFLEX + + "Ƿ" // U+01F7: LATIN CAPITAL LETTER WYNN + + "ᴡ" // U+1D21: LATIN LETTER SMALL CAPITAL W + + "Ẁ" // U+1E80: LATIN CAPITAL LETTER W WITH GRAVE + + "Ẃ" // U+1E82: LATIN CAPITAL LETTER W WITH ACUTE + + "Ẅ" // U+1E84: LATIN CAPITAL LETTER W WITH DIAERESIS + + "Ẇ" // U+1E86: LATIN CAPITAL LETTER W WITH DOT ABOVE + + "Ẉ" // U+1E88: LATIN CAPITAL LETTER W WITH DOT BELOW + + "Ⓦ" // U+24CC: CIRCLED LATIN CAPITAL LETTER W + + "Ⱳ" // U+2C72: LATIN CAPITAL LETTER W WITH HOOK + + "W" // U+FF37: FULLWIDTH LATIN CAPITAL LETTER W + ,"W", // Folded result + + "ŵ" // U+0175: LATIN SMALL LETTER W WITH CIRCUMFLEX + + "ƿ" // U+01BF: LATIN LETTER WYNN + + "ʍ" // U+028D: LATIN SMALL LETTER TURNED W + + "ẁ" // U+1E81: LATIN SMALL LETTER W WITH GRAVE + + "ẃ" // U+1E83: LATIN SMALL LETTER W WITH ACUTE + + "ẅ" // U+1E85: LATIN SMALL LETTER W WITH DIAERESIS + + "ẇ" // U+1E87: LATIN SMALL LETTER W WITH DOT ABOVE + + "ẉ" // U+1E89: LATIN SMALL LETTER W WITH DOT BELOW + + "ẘ" // U+1E98: LATIN SMALL LETTER W WITH RING ABOVE + + "ⓦ" // U+24E6: CIRCLED LATIN SMALL LETTER W + + "ⱳ" // U+2C73: LATIN SMALL LETTER W WITH HOOK + + "w" // U+FF57: FULLWIDTH LATIN SMALL LETTER W + ,"w", // Folded result + + "⒲" // U+24B2: PARENTHESIZED LATIN SMALL LETTER W + ,"(w)", // Folded result + + "Ẋ" // U+1E8A: LATIN CAPITAL LETTER X WITH DOT ABOVE + + "Ẍ" // U+1E8C: LATIN CAPITAL LETTER X WITH DIAERESIS + + "Ⓧ" // U+24CD: CIRCLED LATIN CAPITAL LETTER X + + "X" // U+FF38: FULLWIDTH LATIN CAPITAL LETTER X + ,"X", // Folded result + + "ᶍ" // U+1D8D: LATIN SMALL LETTER X WITH PALATAL HOOK + + "ẋ" // U+1E8B: LATIN SMALL LETTER X WITH DOT ABOVE + + "ẍ" // U+1E8D: LATIN SMALL LETTER X WITH DIAERESIS + + "ₓ" // U+2093: LATIN SUBSCRIPT SMALL LETTER X + + "ⓧ" // U+24E7: CIRCLED LATIN SMALL LETTER X + + "x" // U+FF58: FULLWIDTH LATIN SMALL LETTER X + ,"x", // Folded result + + "⒳" // U+24B3: PARENTHESIZED LATIN SMALL LETTER X + ,"(x)", // Folded result + + "Ý" // U+00DD: LATIN CAPITAL LETTER Y WITH ACUTE + + "Ŷ" // U+0176: LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + + "Ÿ" // U+0178: LATIN CAPITAL LETTER Y WITH DIAERESIS + + "Ƴ" // U+01B3: LATIN CAPITAL LETTER Y WITH HOOK + + "Ȳ" // U+0232: LATIN CAPITAL LETTER Y WITH MACRON + + "Ɏ" // U+024E: LATIN CAPITAL LETTER Y WITH STROKE + + "ʏ" // U+028F: LATIN LETTER SMALL CAPITAL Y + + "Ẏ" // U+1E8E: LATIN CAPITAL LETTER Y WITH DOT ABOVE + + "Ỳ" // U+1EF2: LATIN CAPITAL LETTER Y WITH GRAVE + + "Ỵ" // U+1EF4: LATIN CAPITAL LETTER Y WITH DOT BELOW + + "Ỷ" // U+1EF6: LATIN CAPITAL LETTER Y WITH HOOK ABOVE + + "Ỹ" // U+1EF8: LATIN CAPITAL LETTER Y WITH TILDE + + "Ỿ" // U+1EFE: LATIN CAPITAL LETTER Y WITH LOOP + + "Ⓨ" // U+24CE: CIRCLED LATIN CAPITAL LETTER Y + + "Y" // U+FF39: FULLWIDTH LATIN CAPITAL LETTER Y + ,"Y", // Folded result + + "ý" // U+00FD: LATIN SMALL LETTER Y WITH ACUTE + + "ÿ" // U+00FF: LATIN SMALL LETTER Y WITH DIAERESIS + + "ŷ" // U+0177: LATIN SMALL LETTER Y WITH CIRCUMFLEX + + "ƴ" // U+01B4: LATIN SMALL LETTER Y WITH HOOK + + "ȳ" // U+0233: LATIN SMALL LETTER Y WITH MACRON + + "ɏ" // U+024F: LATIN SMALL LETTER Y WITH STROKE + + "ʎ" // U+028E: LATIN SMALL LETTER TURNED Y + + "ẏ" // U+1E8F: LATIN SMALL LETTER Y WITH DOT ABOVE + + "ẙ" // U+1E99: LATIN SMALL LETTER Y WITH RING ABOVE + + "ỳ" // U+1EF3: LATIN SMALL LETTER Y WITH GRAVE + + "ỵ" // U+1EF5: LATIN SMALL LETTER Y WITH DOT BELOW + + "ỷ" // U+1EF7: LATIN SMALL LETTER Y WITH HOOK ABOVE + + "ỹ" // U+1EF9: LATIN SMALL LETTER Y WITH TILDE + + "ỿ" // U+1EFF: LATIN SMALL LETTER Y WITH LOOP + + "ⓨ" // U+24E8: CIRCLED LATIN SMALL LETTER Y + + "y" // U+FF59: FULLWIDTH LATIN SMALL LETTER Y + ,"y", // Folded result + + "⒴" // U+24B4: PARENTHESIZED LATIN SMALL LETTER Y + ,"(y)", // Folded result + + "Ź" // U+0179: LATIN CAPITAL LETTER Z WITH ACUTE + + "Ż" // U+017B: LATIN CAPITAL LETTER Z WITH DOT ABOVE + + "Ž" // U+017D: LATIN CAPITAL LETTER Z WITH CARON + + "Ƶ" // U+01B5: LATIN CAPITAL LETTER Z WITH STROKE + + "Ȝ" // U+021C: LATIN CAPITAL LETTER YOGH + + "Ȥ" // U+0224: LATIN CAPITAL LETTER Z WITH HOOK + + "ᴢ" // U+1D22: LATIN LETTER SMALL CAPITAL Z + + "Ẑ" // U+1E90: LATIN CAPITAL LETTER Z WITH CIRCUMFLEX + + "Ẓ" // U+1E92: LATIN CAPITAL LETTER Z WITH DOT BELOW + + "Ẕ" // U+1E94: LATIN CAPITAL LETTER Z WITH LINE BELOW + + "Ⓩ" // U+24CF: CIRCLED LATIN CAPITAL LETTER Z + + "Ⱬ" // U+2C6B: LATIN CAPITAL LETTER Z WITH DESCENDER + + "Ꝣ" // U+A762: LATIN CAPITAL LETTER VISIGOTHIC Z + + "Z" // U+FF3A: FULLWIDTH LATIN CAPITAL LETTER Z + ,"Z", // Folded result + + "ź" // U+017A: LATIN SMALL LETTER Z WITH ACUTE + + "ż" // U+017C: LATIN SMALL LETTER Z WITH DOT ABOVE + + "ž" // U+017E: LATIN SMALL LETTER Z WITH CARON + + "ƶ" // U+01B6: LATIN SMALL LETTER Z WITH STROKE + + "ȝ" // U+021D: LATIN SMALL LETTER YOGH + + "ȥ" // U+0225: LATIN SMALL LETTER Z WITH HOOK + + "ɀ" // U+0240: LATIN SMALL LETTER Z WITH SWASH TAIL + + "ʐ" // U+0290: LATIN SMALL LETTER Z WITH RETROFLEX HOOK + + "ʑ" // U+0291: LATIN SMALL LETTER Z WITH CURL + + "ᵶ" // U+1D76: LATIN SMALL LETTER Z WITH MIDDLE TILDE + + "ᶎ" // U+1D8E: LATIN SMALL LETTER Z WITH PALATAL HOOK + + "ẑ" // U+1E91: LATIN SMALL LETTER Z WITH CIRCUMFLEX + + "ẓ" // U+1E93: LATIN SMALL LETTER Z WITH DOT BELOW + + "ẕ" // U+1E95: LATIN SMALL LETTER Z WITH LINE BELOW + + "ⓩ" // U+24E9: CIRCLED LATIN SMALL LETTER Z + + "ⱬ" // U+2C6C: LATIN SMALL LETTER Z WITH DESCENDER + + "ꝣ" // U+A763: LATIN SMALL LETTER VISIGOTHIC Z + + "z" // U+FF5A: FULLWIDTH LATIN SMALL LETTER Z + ,"z", // Folded result + + "⒵" // U+24B5: PARENTHESIZED LATIN SMALL LETTER Z + ,"(z)", // Folded result + + "⁰" // U+2070: SUPERSCRIPT ZERO + + "₀" // U+2080: SUBSCRIPT ZERO + + "⓪" // U+24EA: CIRCLED DIGIT ZERO + + "⓿" // U+24FF: NEGATIVE CIRCLED DIGIT ZERO + + "0" // U+FF10: FULLWIDTH DIGIT ZERO + ,"0", // Folded result + + "¹" // U+00B9: SUPERSCRIPT ONE + + "₁" // U+2081: SUBSCRIPT ONE + + "①" // U+2460: CIRCLED DIGIT ONE + + "⓵" // U+24F5: DOUBLE CIRCLED DIGIT ONE + + "❶" // U+2776: DINGBAT NEGATIVE CIRCLED DIGIT ONE + + "➀" // U+2780: DINGBAT CIRCLED SANS-SERIF DIGIT ONE + + "➊" // U+278A: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE + + "1" // U+FF11: FULLWIDTH DIGIT ONE + ,"1", // Folded result + + "⒈" // U+2488: DIGIT ONE FULL STOP + ,"1.", // Folded result + + "⑴" // U+2474: PARENTHESIZED DIGIT ONE + ,"(1)", // Folded result + + "²" // U+00B2: SUPERSCRIPT TWO + + "₂" // U+2082: SUBSCRIPT TWO + + "②" // U+2461: CIRCLED DIGIT TWO + + "⓶" // U+24F6: DOUBLE CIRCLED DIGIT TWO + + "❷" // U+2777: DINGBAT NEGATIVE CIRCLED DIGIT TWO + + "➁" // U+2781: DINGBAT CIRCLED SANS-SERIF DIGIT TWO + + "➋" // U+278B: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO + + "2" // U+FF12: FULLWIDTH DIGIT TWO + ,"2", // Folded result + + "⒉" // U+2489: DIGIT TWO FULL STOP + ,"2.", // Folded result + + "⑵" // U+2475: PARENTHESIZED DIGIT TWO + ,"(2)", // Folded result + + "³" // U+00B3: SUPERSCRIPT THREE + + "₃" // U+2083: SUBSCRIPT THREE + + "③" // U+2462: CIRCLED DIGIT THREE + + "⓷" // U+24F7: DOUBLE CIRCLED DIGIT THREE + + "❸" // U+2778: DINGBAT NEGATIVE CIRCLED DIGIT THREE + + "➂" // U+2782: DINGBAT CIRCLED SANS-SERIF DIGIT THREE + + "➌" // U+278C: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE + + "3" // U+FF13: FULLWIDTH DIGIT THREE + ,"3", // Folded result + + "⒊" // U+248A: DIGIT THREE FULL STOP + ,"3.", // Folded result + + "⑶" // U+2476: PARENTHESIZED DIGIT THREE + ,"(3)", // Folded result + + "⁴" // U+2074: SUPERSCRIPT FOUR + + "₄" // U+2084: SUBSCRIPT FOUR + + "④" // U+2463: CIRCLED DIGIT FOUR + + "⓸" // U+24F8: DOUBLE CIRCLED DIGIT FOUR + + "❹" // U+2779: DINGBAT NEGATIVE CIRCLED DIGIT FOUR + + "➃" // U+2783: DINGBAT CIRCLED SANS-SERIF DIGIT FOUR + + "➍" // U+278D: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR + + "4" // U+FF14: FULLWIDTH DIGIT FOUR + ,"4", // Folded result + + "⒋" // U+248B: DIGIT FOUR FULL STOP + ,"4.", // Folded result + + "⑷" // U+2477: PARENTHESIZED DIGIT FOUR + ,"(4)", // Folded result + + "⁵" // U+2075: SUPERSCRIPT FIVE + + "₅" // U+2085: SUBSCRIPT FIVE + + "⑤" // U+2464: CIRCLED DIGIT FIVE + + "⓹" // U+24F9: DOUBLE CIRCLED DIGIT FIVE + + "❺" // U+277A: DINGBAT NEGATIVE CIRCLED DIGIT FIVE + + "➄" // U+2784: DINGBAT CIRCLED SANS-SERIF DIGIT FIVE + + "➎" // U+278E: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE + + "5" // U+FF15: FULLWIDTH DIGIT FIVE + ,"5", // Folded result + + "⒌" // U+248C: DIGIT FIVE FULL STOP + ,"5.", // Folded result + + "⑸" // U+2478: PARENTHESIZED DIGIT FIVE + ,"(5)", // Folded result + + "⁶" // U+2076: SUPERSCRIPT SIX + + "₆" // U+2086: SUBSCRIPT SIX + + "⑥" // U+2465: CIRCLED DIGIT SIX + + "⓺" // U+24FA: DOUBLE CIRCLED DIGIT SIX + + "❻" // U+277B: DINGBAT NEGATIVE CIRCLED DIGIT SIX + + "➅" // U+2785: DINGBAT CIRCLED SANS-SERIF DIGIT SIX + + "➏" // U+278F: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX + + "6" // U+FF16: FULLWIDTH DIGIT SIX + ,"6", // Folded result + + "⒍" // U+248D: DIGIT SIX FULL STOP + ,"6.", // Folded result + + "⑹" // U+2479: PARENTHESIZED DIGIT SIX + ,"(6)", // Folded result + + "⁷" // U+2077: SUPERSCRIPT SEVEN + + "₇" // U+2087: SUBSCRIPT SEVEN + + "⑦" // U+2466: CIRCLED DIGIT SEVEN + + "⓻" // U+24FB: DOUBLE CIRCLED DIGIT SEVEN + + "❼" // U+277C: DINGBAT NEGATIVE CIRCLED DIGIT SEVEN + + "➆" // U+2786: DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN + + "➐" // U+2790: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN + + "7" // U+FF17: FULLWIDTH DIGIT SEVEN + ,"7", // Folded result + + "⒎" // U+248E: DIGIT SEVEN FULL STOP + ,"7.", // Folded result + + "⑺" // U+247A: PARENTHESIZED DIGIT SEVEN + ,"(7)", // Folded result + + "⁸" // U+2078: SUPERSCRIPT EIGHT + + "₈" // U+2088: SUBSCRIPT EIGHT + + "⑧" // U+2467: CIRCLED DIGIT EIGHT + + "⓼" // U+24FC: DOUBLE CIRCLED DIGIT EIGHT + + "❽" // U+277D: DINGBAT NEGATIVE CIRCLED DIGIT EIGHT + + "➇" // U+2787: DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT + + "➑" // U+2791: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT + + "8" // U+FF18: FULLWIDTH DIGIT EIGHT + ,"8", // Folded result + + "⒏" // U+248F: DIGIT EIGHT FULL STOP + ,"8.", // Folded result + + "⑻" // U+247B: PARENTHESIZED DIGIT EIGHT + ,"(8)", // Folded result + + "⁹" // U+2079: SUPERSCRIPT NINE + + "₉" // U+2089: SUBSCRIPT NINE + + "⑨" // U+2468: CIRCLED DIGIT NINE + + "⓽" // U+24FD: DOUBLE CIRCLED DIGIT NINE + + "❾" // U+277E: DINGBAT NEGATIVE CIRCLED DIGIT NINE + + "➈" // U+2788: DINGBAT CIRCLED SANS-SERIF DIGIT NINE + + "➒" // U+2792: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE + + "9" // U+FF19: FULLWIDTH DIGIT NINE + ,"9", // Folded result + + "⒐" // U+2490: DIGIT NINE FULL STOP + ,"9.", // Folded result + + "⑼" // U+247C: PARENTHESIZED DIGIT NINE + ,"(9)", // Folded result + + "⑩" // U+2469: CIRCLED NUMBER TEN + + "⓾" // U+24FE: DOUBLE CIRCLED NUMBER TEN + + "❿" // U+277F: DINGBAT NEGATIVE CIRCLED NUMBER TEN + + "➉" // U+2789: DINGBAT CIRCLED SANS-SERIF NUMBER TEN + + "➓" // U+2793: DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN + ,"10", // Folded result + + "⒑" // U+2491: NUMBER TEN FULL STOP + ,"10.", // Folded result + + "⑽" // U+247D: PARENTHESIZED NUMBER TEN + ,"(10)", // Folded result + + "⑪" // U+246A: CIRCLED NUMBER ELEVEN + + "⓫" // U+24EB: NEGATIVE CIRCLED NUMBER ELEVEN + ,"11", // Folded result + + "⒒" // U+2492: NUMBER ELEVEN FULL STOP + ,"11.", // Folded result + + "⑾" // U+247E: PARENTHESIZED NUMBER ELEVEN + ,"(11)", // Folded result + + "⑫" // U+246B: CIRCLED NUMBER TWELVE + + "⓬" // U+24EC: NEGATIVE CIRCLED NUMBER TWELVE + ,"12", // Folded result + + "⒓" // U+2493: NUMBER TWELVE FULL STOP + ,"12.", // Folded result + + "⑿" // U+247F: PARENTHESIZED NUMBER TWELVE + ,"(12)", // Folded result + + "⑬" // U+246C: CIRCLED NUMBER THIRTEEN + + "⓭" // U+24ED: NEGATIVE CIRCLED NUMBER THIRTEEN + ,"13", // Folded result + + "⒔" // U+2494: NUMBER THIRTEEN FULL STOP + ,"13.", // Folded result + + "⒀" // U+2480: PARENTHESIZED NUMBER THIRTEEN + ,"(13)", // Folded result + + "⑭" // U+246D: CIRCLED NUMBER FOURTEEN + + "⓮" // U+24EE: NEGATIVE CIRCLED NUMBER FOURTEEN + ,"14", // Folded result + + "⒕" // U+2495: NUMBER FOURTEEN FULL STOP + ,"14.", // Folded result + + "⒁" // U+2481: PARENTHESIZED NUMBER FOURTEEN + ,"(14)", // Folded result + + "⑮" // U+246E: CIRCLED NUMBER FIFTEEN + + "⓯" // U+24EF: NEGATIVE CIRCLED NUMBER FIFTEEN + ,"15", // Folded result + + "⒖" // U+2496: NUMBER FIFTEEN FULL STOP + ,"15.", // Folded result + + "⒂" // U+2482: PARENTHESIZED NUMBER FIFTEEN + ,"(15)", // Folded result + + "⑯" // U+246F: CIRCLED NUMBER SIXTEEN + + "⓰" // U+24F0: NEGATIVE CIRCLED NUMBER SIXTEEN + ,"16", // Folded result + + "⒗" // U+2497: NUMBER SIXTEEN FULL STOP + ,"16.", // Folded result + + "⒃" // U+2483: PARENTHESIZED NUMBER SIXTEEN + ,"(16)", // Folded result + + "⑰" // U+2470: CIRCLED NUMBER SEVENTEEN + + "⓱" // U+24F1: NEGATIVE CIRCLED NUMBER SEVENTEEN + ,"17", // Folded result + + "⒘" // U+2498: NUMBER SEVENTEEN FULL STOP + ,"17.", // Folded result + + "⒄" // U+2484: PARENTHESIZED NUMBER SEVENTEEN + ,"(17)", // Folded result + + "⑱" // U+2471: CIRCLED NUMBER EIGHTEEN + + "⓲" // U+24F2: NEGATIVE CIRCLED NUMBER EIGHTEEN + ,"18", // Folded result + + "⒙" // U+2499: NUMBER EIGHTEEN FULL STOP + ,"18.", // Folded result + + "⒅" // U+2485: PARENTHESIZED NUMBER EIGHTEEN + ,"(18)", // Folded result + + "⑲" // U+2472: CIRCLED NUMBER NINETEEN + + "⓳" // U+24F3: NEGATIVE CIRCLED NUMBER NINETEEN + ,"19", // Folded result + + "⒚" // U+249A: NUMBER NINETEEN FULL STOP + ,"19.", // Folded result + + "⒆" // U+2486: PARENTHESIZED NUMBER NINETEEN + ,"(19)", // Folded result + + "⑳" // U+2473: CIRCLED NUMBER TWENTY + + "⓴" // U+24F4: NEGATIVE CIRCLED NUMBER TWENTY + ,"20", // Folded result + + "⒛" // U+249B: NUMBER TWENTY FULL STOP + ,"20.", // Folded result + + "⒇" // U+2487: PARENTHESIZED NUMBER TWENTY + ,"(20)", // Folded result + + "«" // U+00AB: LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + + "»" // U+00BB: RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + + "“" // U+201C: LEFT DOUBLE QUOTATION MARK + + "”" // U+201D: RIGHT DOUBLE QUOTATION MARK + + "„" // U+201E: DOUBLE LOW-9 QUOTATION MARK + + "″" // U+2033: DOUBLE PRIME + + "‶" // U+2036: REVERSED DOUBLE PRIME + + "❝" // U+275D: HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT + + "❞" // U+275E: HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT + + "❮" // U+276E: HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT + + "❯" // U+276F: HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT + + """ // U+FF02: FULLWIDTH QUOTATION MARK + ,"\"", // Folded result + + "‘" // U+2018: LEFT SINGLE QUOTATION MARK + + "’" // U+2019: RIGHT SINGLE QUOTATION MARK + + "‚" // U+201A: SINGLE LOW-9 QUOTATION MARK + + "‛" // U+201B: SINGLE HIGH-REVERSED-9 QUOTATION MARK + + "′" // U+2032: PRIME + + "‵" // U+2035: REVERSED PRIME + + "‹" // U+2039: SINGLE LEFT-POINTING ANGLE QUOTATION MARK + + "›" // U+203A: SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + + "❛" // U+275B: HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT + + "❜" // U+275C: HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT + + "'" // U+FF07: FULLWIDTH APOSTROPHE + ,"'", // Folded result + + "‐" // U+2010: HYPHEN + + "‑" // U+2011: NON-BREAKING HYPHEN + + "‒" // U+2012: FIGURE DASH + + "–" // U+2013: EN DASH + + "—" // U+2014: EM DASH + + "⁻" // U+207B: SUPERSCRIPT MINUS + + "₋" // U+208B: SUBSCRIPT MINUS + + "-" // U+FF0D: FULLWIDTH HYPHEN-MINUS + ,"-", // Folded result + + "⁅" // U+2045: LEFT SQUARE BRACKET WITH QUILL + + "❲" // U+2772: LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT + + "[" // U+FF3B: FULLWIDTH LEFT SQUARE BRACKET + ,"[", // Folded result + + "⁆" // U+2046: RIGHT SQUARE BRACKET WITH QUILL + + "❳" // U+2773: LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT + + "]" // U+FF3D: FULLWIDTH RIGHT SQUARE BRACKET + ,"]", // Folded result + + "⁽" // U+207D: SUPERSCRIPT LEFT PARENTHESIS + + "₍" // U+208D: SUBSCRIPT LEFT PARENTHESIS + + "❨" // U+2768: MEDIUM LEFT PARENTHESIS ORNAMENT + + "❪" // U+276A: MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT + + "(" // U+FF08: FULLWIDTH LEFT PARENTHESIS + ,"(", // Folded result + + "⸨" // U+2E28: LEFT DOUBLE PARENTHESIS + ,"((", // Folded result + + "⁾" // U+207E: SUPERSCRIPT RIGHT PARENTHESIS + + "₎" // U+208E: SUBSCRIPT RIGHT PARENTHESIS + + "❩" // U+2769: MEDIUM RIGHT PARENTHESIS ORNAMENT + + "❫" // U+276B: MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT + + ")" // U+FF09: FULLWIDTH RIGHT PARENTHESIS + ,")", // Folded result + + "⸩" // U+2E29: RIGHT DOUBLE PARENTHESIS + ,"))", // Folded result + + "❬" // U+276C: MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT + + "❰" // U+2770: HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT + + "<" // U+FF1C: FULLWIDTH LESS-THAN SIGN + ,"<", // Folded result + + "❭" // U+276D: MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT + + "❱" // U+2771: HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT + + ">" // U+FF1E: FULLWIDTH GREATER-THAN SIGN + ,">", // Folded result + + "❴" // U+2774: MEDIUM LEFT CURLY BRACKET ORNAMENT + + "{" // U+FF5B: FULLWIDTH LEFT CURLY BRACKET + ,"{", // Folded result + + "❵" // U+2775: MEDIUM RIGHT CURLY BRACKET ORNAMENT + + "}" // U+FF5D: FULLWIDTH RIGHT CURLY BRACKET + ,"}", // Folded result + + "⁺" // U+207A: SUPERSCRIPT PLUS SIGN + + "₊" // U+208A: SUBSCRIPT PLUS SIGN + + "+" // U+FF0B: FULLWIDTH PLUS SIGN + ,"+", // Folded result + + "⁼" // U+207C: SUPERSCRIPT EQUALS SIGN + + "₌" // U+208C: SUBSCRIPT EQUALS SIGN + + "=" // U+FF1D: FULLWIDTH EQUALS SIGN + ,"=", // Folded result + + "!" // U+FF01: FULLWIDTH EXCLAMATION MARK + ,"!", // Folded result + + "‼" // U+203C: DOUBLE EXCLAMATION MARK + ,"!!", // Folded result + + "⁉" // U+2049: EXCLAMATION QUESTION MARK + ,"!?", // Folded result + + "#" // U+FF03: FULLWIDTH NUMBER SIGN + ,"#", // Folded result + + "$" // U+FF04: FULLWIDTH DOLLAR SIGN + ,"$", // Folded result + + "⁒" // U+2052: COMMERCIAL MINUS SIGN + + "%" // U+FF05: FULLWIDTH PERCENT SIGN + ,"%", // Folded result + + "&" // U+FF06: FULLWIDTH AMPERSAND + ,"&", // Folded result + + "⁎" // U+204E: LOW ASTERISK + + "*" // U+FF0A: FULLWIDTH ASTERISK + ,"*", // Folded result + + "," // U+FF0C: FULLWIDTH COMMA + ,",", // Folded result + + "." // U+FF0E: FULLWIDTH FULL STOP + ,".", // Folded result + + "⁄" // U+2044: FRACTION SLASH + + "/" // U+FF0F: FULLWIDTH SOLIDUS + ,"/", // Folded result + + ":" // U+FF1A: FULLWIDTH COLON + ,":", // Folded result + + "⁏" // U+204F: REVERSED SEMICOLON + + ";" // U+FF1B: FULLWIDTH SEMICOLON + ,";", // Folded result + + "?" // U+FF1F: FULLWIDTH QUESTION MARK + ,"?", // Folded result + + "⁇" // U+2047: DOUBLE QUESTION MARK + ,"??", // Folded result + + "⁈" // U+2048: QUESTION EXCLAMATION MARK + ,"?!", // Folded result + + "@" // U+FF20: FULLWIDTH COMMERCIAL AT + ,"@", // Folded result + + "\" // U+FF3C: FULLWIDTH REVERSE SOLIDUS + ,"\\", // Folded result + + "‸" // U+2038: CARET + + "^" // U+FF3E: FULLWIDTH CIRCUMFLEX ACCENT + ,"^", // Folded result + + "_" // U+FF3F: FULLWIDTH LOW LINE + ,"_", // Folded result + + "⁓" // U+2053: SWUNG DASH + + "~" // U+FF5E: FULLWIDTH TILDE + ,"~", // Folded result + }; + + // Construct input text and expected output tokens + List expectedOutputTokens = new ArrayList(); + StringBuffer inputText = new StringBuffer(); + for (int n = 0 ; n < foldings.length ; n += 2) { + if (n > 0) { + inputText.append(' '); // Space between tokens + } + inputText.append(foldings[n]); + + // Construct the expected output token: the ASCII string to fold to, + // duplicated as many times as the number of characters in the input text. + StringBuffer expected = new StringBuffer(); + int numChars = foldings[n].length(); + for (int m = 0 ; m < numChars; ++m) { + expected.append(foldings[n + 1]); + } + expectedOutputTokens.add(expected.toString()); + } + + TokenStream stream = new WhitespaceTokenizer(new StringReader(inputText.toString())); + ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); + final Token reusableToken = new Token(); + Iterator expectedIter = expectedOutputTokens.iterator(); + while (expectedIter.hasNext()) { + assertEquals(expectedIter.next(), filter.next(reusableToken).term()); + } + assertNull(filter.next(reusableToken)); + } +} Property changes on: src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java ___________________________________________________________________ Name: svn:mime-type + text/plain Name: svn:eol-style + native Index: src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (revision 0) +++ src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java (revision 0) @@ -0,0 +1,2030 @@ +package org.apache.lucene.analysis; + +import org.apache.lucene.util.ArrayUtil; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This class converts alphabetic, numeric, and symbolic Unicode characters + * which are not in the first 127 ASCII characters (the "Basic Latin" Unicode + * block) into their ASCII equivalents, if one exists. + * + * Characters from the following Unicode blocks are converted; however, only + * those characters with reasonable ASCII alternatives are converted: + * + * + * + * See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode + * + * The set of character conversions supported by this class is a superset of + * those supported by Lucene's {@link ISOLatin1AccentFilter} which strips + * accents from Latin1 characters. For example, 'à' will be replaced by + * 'a'. + */ +public class ASCIIFoldingFilter extends TokenFilter { + public ASCIIFoldingFilter(TokenStream input) + { + super(input); + } + + private char[] output = new char[512]; + private int outputPos; + + public Token next(Token result) + throws java.io.IOException + { + result = input.next(result); + + if (result != null) + { + final char[] buffer = result.termBuffer(); + final int length = result.termLength(); + + // If no characters actually require rewriting then we + // just return token as-is: + for(int i = 0 ; i < length ; ++i) { + final char c = buffer[i]; + if (c >= '\u0080') + { + foldToASCII(buffer, length); + result.setTermBuffer(output, 0, outputPos); + break; + } + } + return result; + } else { + return null; + } + } + + /** + * Converts characters above ASCII to their ASCII equivalents. For example, + * accents are removed from accented characters. + * @param input The string to fold + * @param length The number of characters in the input string + */ + public void foldToASCII(char[] input, int length) + { + // Worst-case length required: + final int maxSizeNeeded = 4 * length; + if (output.length < maxSizeNeeded) { + output = new char[ArrayUtil.getNextSize(maxSizeNeeded)]; + } + + outputPos = 0; + + for (int pos = 0 ; pos < length ; ++pos) { + final char c = input[pos]; + + // Quick test: if it's not in range then just keep current character + if (c < '\u0080') { + output[outputPos++] = c; + } else { + switch (c) { + case '\u00C0': // À [LATIN CAPITAL LETTER A WITH GRAVE] + case '\u00C1': // Á [LATIN CAPITAL LETTER A WITH ACUTE] + case '\u00C2': //  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] + case '\u00C3': // à [LATIN CAPITAL LETTER A WITH TILDE] + case '\u00C4': // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] + case '\u00C5': // Å [LATIN CAPITAL LETTER A WITH RING ABOVE] + case '\u0100': // Ā [LATIN CAPITAL LETTER A WITH MACRON] + case '\u0102': // Ă [LATIN CAPITAL LETTER A WITH BREVE] + case '\u0104': // Ą [LATIN CAPITAL LETTER A WITH OGONEK] + case '\u018F': // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] + case '\u01CD': // Ǎ [LATIN CAPITAL LETTER A WITH CARON] + case '\u01DE': // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] + case '\u01E0': // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] + case '\u01FA': // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] + case '\u0200': // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] + case '\u0202': // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] + case '\u0226': // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] + case '\u023A': // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] + case '\u1D00': // ᴀ [LATIN LETTER SMALL CAPITAL A] + case '\u1E00': // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] + case '\u1EA0': // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] + case '\u1EA2': // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] + case '\u1EA4': // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] + case '\u1EA6': // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] + case '\u1EA8': // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EAA': // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] + case '\u1EAC': // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EAE': // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] + case '\u1EB0': // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] + case '\u1EB2': // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] + case '\u1EB4': // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] + case '\u1EB6': // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] + case '\u24B6': // Ⓐ [CIRCLED LATIN CAPITAL LETTER A] + case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A] + output[outputPos++] = 'A'; + break; + case '\u00E0': // à [LATIN SMALL LETTER A WITH GRAVE] + case '\u00E1': // á [LATIN SMALL LETTER A WITH ACUTE] + case '\u00E2': // â [LATIN SMALL LETTER A WITH CIRCUMFLEX] + case '\u00E3': // ã [LATIN SMALL LETTER A WITH TILDE] + case '\u00E4': // ä [LATIN SMALL LETTER A WITH DIAERESIS] + case '\u00E5': // å [LATIN SMALL LETTER A WITH RING ABOVE] + case '\u0101': // ā [LATIN SMALL LETTER A WITH MACRON] + case '\u0103': // ă [LATIN SMALL LETTER A WITH BREVE] + case '\u0105': // ą [LATIN SMALL LETTER A WITH OGONEK] + case '\u01CE': // ǎ [LATIN SMALL LETTER A WITH CARON] + case '\u01DF': // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] + case '\u01E1': // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] + case '\u01FB': // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] + case '\u0201': // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] + case '\u0203': // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] + case '\u0227': // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] + case '\u0250': // ɐ [LATIN SMALL LETTER TURNED A] + case '\u0259': // ə [LATIN SMALL LETTER SCHWA] + case '\u025A': // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] + case '\u1D8F': // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] + case '\u1D95': // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] + case '\u1E01': // ạ [LATIN SMALL LETTER A WITH RING BELOW] + case '\u1E9A': // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] + case '\u1EA1': // ạ [LATIN SMALL LETTER A WITH DOT BELOW] + case '\u1EA3': // ả [LATIN SMALL LETTER A WITH HOOK ABOVE] + case '\u1EA5': // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] + case '\u1EA7': // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] + case '\u1EA9': // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EAB': // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] + case '\u1EAD': // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EAF': // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] + case '\u1EB1': // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] + case '\u1EB3': // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] + case '\u1EB5': // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] + case '\u1EB7': // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] + case '\u2090': // ₐ [LATIN SUBSCRIPT SMALL LETTER A] + case '\u2094': // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] + case '\u24D0': // ⓐ [CIRCLED LATIN SMALL LETTER A] + case '\u2C65': // ⱥ [LATIN SMALL LETTER A WITH STROKE] + case '\u2C6F': // Ɐ [LATIN CAPITAL LETTER TURNED A] + case '\uFF41': // a [FULLWIDTH LATIN SMALL LETTER A] + output[outputPos++] = 'a'; + break; + case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA] + output[outputPos++] = 'A'; + output[outputPos++] = 'A'; + break; + case '\u00C6': // Æ [LATIN CAPITAL LETTER AE] + case '\u01E2': // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] + case '\u01FC': // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] + case '\u1D01': // ᴁ [LATIN LETTER SMALL CAPITAL AE] + output[outputPos++] = 'A'; + output[outputPos++] = 'E'; + break; + case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO] + output[outputPos++] = 'A'; + output[outputPos++] = 'O'; + break; + case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU] + output[outputPos++] = 'A'; + output[outputPos++] = 'U'; + break; + case '\uA738': // Ꜹ [LATIN CAPITAL LETTER AV] + case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] + output[outputPos++] = 'A'; + output[outputPos++] = 'V'; + break; + case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY] + output[outputPos++] = 'A'; + output[outputPos++] = 'Y'; + break; + case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A] + output[outputPos++] = '('; + output[outputPos++] = 'a'; + output[outputPos++] = ')'; + break; + case '\uA733': // ꜳ [LATIN SMALL LETTER AA] + output[outputPos++] = 'a'; + output[outputPos++] = 'a'; + break; + case '\u00E6': // æ [LATIN SMALL LETTER AE] + case '\u01E3': // ǣ [LATIN SMALL LETTER AE WITH MACRON] + case '\u01FD': // ǽ [LATIN SMALL LETTER AE WITH ACUTE] + case '\u1D02': // ᴂ [LATIN SMALL LETTER TURNED AE] + output[outputPos++] = 'a'; + output[outputPos++] = 'e'; + break; + case '\uA735': // ꜵ [LATIN SMALL LETTER AO] + output[outputPos++] = 'a'; + output[outputPos++] = 'o'; + break; + case '\uA737': // ꜷ [LATIN SMALL LETTER AU] + output[outputPos++] = 'a'; + output[outputPos++] = 'u'; + break; + case '\uA739': // ꜹ [LATIN SMALL LETTER AV] + case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] + output[outputPos++] = 'a'; + output[outputPos++] = 'v'; + break; + case '\uA73D': // ꜽ [LATIN SMALL LETTER AY] + output[outputPos++] = 'a'; + output[outputPos++] = 'y'; + break; + case '\u0181': // Ɓ [LATIN CAPITAL LETTER B WITH HOOK] + case '\u0182': // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] + case '\u0243': // Ƀ [LATIN CAPITAL LETTER B WITH STROKE] + case '\u0299': // ʙ [LATIN LETTER SMALL CAPITAL B] + case '\u1D03': // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] + case '\u1E02': // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] + case '\u1E04': // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] + case '\u1E06': // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] + case '\u24B7': // Ⓑ [CIRCLED LATIN CAPITAL LETTER B] + case '\uFF22': // B [FULLWIDTH LATIN CAPITAL LETTER B] + output[outputPos++] = 'B'; + break; + case '\u0180': // ƀ [LATIN SMALL LETTER B WITH STROKE] + case '\u0183': // ƃ [LATIN SMALL LETTER B WITH TOPBAR] + case '\u0253': // ɓ [LATIN SMALL LETTER B WITH HOOK] + case '\u1D6C': // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] + case '\u1D80': // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] + case '\u1E03': // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] + case '\u1E05': // ḅ [LATIN SMALL LETTER B WITH DOT BELOW] + case '\u1E07': // ḇ [LATIN SMALL LETTER B WITH LINE BELOW] + case '\u24D1': // ⓑ [CIRCLED LATIN SMALL LETTER B] + case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B] + output[outputPos++] = 'b'; + break; + case '\u249D': // ⒝ [PARENTHESIZED LATIN SMALL LETTER B] + output[outputPos++] = '('; + output[outputPos++] = 'b'; + output[outputPos++] = ')'; + break; + case '\u00C7': // Ç [LATIN CAPITAL LETTER C WITH CEDILLA] + case '\u0106': // Ć [LATIN CAPITAL LETTER C WITH ACUTE] + case '\u0108': // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] + case '\u010A': // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] + case '\u010C': // Č [LATIN CAPITAL LETTER C WITH CARON] + case '\u0187': // Ƈ [LATIN CAPITAL LETTER C WITH HOOK] + case '\u023B': // Ȼ [LATIN CAPITAL LETTER C WITH STROKE] + case '\u0297': // ʗ [LATIN LETTER STRETCHED C] + case '\u1D04': // ᴄ [LATIN LETTER SMALL CAPITAL C] + case '\u1E08': // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] + case '\u24B8': // Ⓒ [CIRCLED LATIN CAPITAL LETTER C] + case '\uFF23': // C [FULLWIDTH LATIN CAPITAL LETTER C] + output[outputPos++] = 'C'; + break; + case '\u00E7': // ç [LATIN SMALL LETTER C WITH CEDILLA] + case '\u0107': // ć [LATIN SMALL LETTER C WITH ACUTE] + case '\u0109': // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] + case '\u010B': // ċ [LATIN SMALL LETTER C WITH DOT ABOVE] + case '\u010D': // č [LATIN SMALL LETTER C WITH CARON] + case '\u0188': // ƈ [LATIN SMALL LETTER C WITH HOOK] + case '\u023C': // ȼ [LATIN SMALL LETTER C WITH STROKE] + case '\u0255': // ɕ [LATIN SMALL LETTER C WITH CURL] + case '\u1E09': // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] + case '\u2184': // ↄ [LATIN SMALL LETTER REVERSED C] + case '\u24D2': // ⓒ [CIRCLED LATIN SMALL LETTER C] + case '\uA73E': // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] + case '\uA73F': // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] + case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C] + output[outputPos++] = 'c'; + break; + case '\u249E': // ⒞ [PARENTHESIZED LATIN SMALL LETTER C] + output[outputPos++] = '('; + output[outputPos++] = 'c'; + output[outputPos++] = ')'; + break; + case '\u00D0': // Ð [LATIN CAPITAL LETTER ETH] + case '\u010E': // Ď [LATIN CAPITAL LETTER D WITH CARON] + case '\u0110': // Đ [LATIN CAPITAL LETTER D WITH STROKE] + case '\u0189': // Ɖ [LATIN CAPITAL LETTER AFRICAN D] + case '\u018A': // Ɗ [LATIN CAPITAL LETTER D WITH HOOK] + case '\u018B': // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] + case '\u1D05': // ᴅ [LATIN LETTER SMALL CAPITAL D] + case '\u1D06': // ᴆ [LATIN LETTER SMALL CAPITAL ETH] + case '\u1E0A': // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] + case '\u1E0C': // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] + case '\u1E0E': // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] + case '\u1E10': // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] + case '\u1E12': // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] + case '\u24B9': // Ⓓ [CIRCLED LATIN CAPITAL LETTER D] + case '\uA779': // Ꝺ [LATIN CAPITAL LETTER INSULAR D] + case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D] + output[outputPos++] = 'D'; + break; + case '\u00F0': // ð [LATIN SMALL LETTER ETH] + case '\u010F': // ď [LATIN SMALL LETTER D WITH CARON] + case '\u0111': // đ [LATIN SMALL LETTER D WITH STROKE] + case '\u018C': // ƌ [LATIN SMALL LETTER D WITH TOPBAR] + case '\u0221': // ȡ [LATIN SMALL LETTER D WITH CURL] + case '\u0256': // ɖ [LATIN SMALL LETTER D WITH TAIL] + case '\u0257': // ɗ [LATIN SMALL LETTER D WITH HOOK] + case '\u1D6D': // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] + case '\u1D81': // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] + case '\u1D91': // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] + case '\u1E0B': // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] + case '\u1E0D': // ḍ [LATIN SMALL LETTER D WITH DOT BELOW] + case '\u1E0F': // ḏ [LATIN SMALL LETTER D WITH LINE BELOW] + case '\u1E11': // ḑ [LATIN SMALL LETTER D WITH CEDILLA] + case '\u1E13': // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] + case '\u24D3': // ⓓ [CIRCLED LATIN SMALL LETTER D] + case '\uA77A': // ꝺ [LATIN SMALL LETTER INSULAR D] + case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D] + output[outputPos++] = 'd'; + break; + case '\u01C4': // DŽ [LATIN CAPITAL LETTER DZ WITH CARON] + case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ] + output[outputPos++] = 'D'; + output[outputPos++] = 'Z'; + break; + case '\u01C5': // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] + case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] + output[outputPos++] = 'D'; + output[outputPos++] = 'z'; + break; + case '\u249F': // ⒟ [PARENTHESIZED LATIN SMALL LETTER D] + output[outputPos++] = '('; + output[outputPos++] = 'd'; + output[outputPos++] = ')'; + break; + case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH] + output[outputPos++] = 'd'; + output[outputPos++] = 'b'; + break; + case '\u01C6': // dž [LATIN SMALL LETTER DZ WITH CARON] + case '\u01F3': // dz [LATIN SMALL LETTER DZ] + case '\u02A3': // ʣ [LATIN SMALL LETTER DZ DIGRAPH] + case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] + output[outputPos++] = 'd'; + output[outputPos++] = 'z'; + break; + case '\u00C8': // È [LATIN CAPITAL LETTER E WITH GRAVE] + case '\u00C9': // É [LATIN CAPITAL LETTER E WITH ACUTE] + case '\u00CA': // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] + case '\u00CB': // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] + case '\u0112': // Ē [LATIN CAPITAL LETTER E WITH MACRON] + case '\u0114': // Ĕ [LATIN CAPITAL LETTER E WITH BREVE] + case '\u0116': // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] + case '\u0118': // Ę [LATIN CAPITAL LETTER E WITH OGONEK] + case '\u011A': // Ě [LATIN CAPITAL LETTER E WITH CARON] + case '\u018E': // Ǝ [LATIN CAPITAL LETTER REVERSED E] + case '\u0190': // Ɛ [LATIN CAPITAL LETTER OPEN E] + case '\u0204': // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] + case '\u0206': // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] + case '\u0228': // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] + case '\u0246': // Ɇ [LATIN CAPITAL LETTER E WITH STROKE] + case '\u1D07': // ᴇ [LATIN LETTER SMALL CAPITAL E] + case '\u1E14': // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] + case '\u1E16': // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] + case '\u1E18': // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] + case '\u1E1A': // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] + case '\u1E1C': // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] + case '\u1EB8': // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] + case '\u1EBA': // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] + case '\u1EBC': // Ẽ [LATIN CAPITAL LETTER E WITH TILDE] + case '\u1EBE': // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] + case '\u1EC0': // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] + case '\u1EC2': // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EC4': // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] + case '\u1EC6': // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + case '\u24BA': // Ⓔ [CIRCLED LATIN CAPITAL LETTER E] + case '\u2C7B': // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] + case '\uFF25': // E [FULLWIDTH LATIN CAPITAL LETTER E] + output[outputPos++] = 'E'; + break; + case '\u00E8': // è [LATIN SMALL LETTER E WITH GRAVE] + case '\u00E9': // é [LATIN SMALL LETTER E WITH ACUTE] + case '\u00EA': // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] + case '\u00EB': // ë [LATIN SMALL LETTER E WITH DIAERESIS] + case '\u0113': // ē [LATIN SMALL LETTER E WITH MACRON] + case '\u0115': // ĕ [LATIN SMALL LETTER E WITH BREVE] + case '\u0117': // ė [LATIN SMALL LETTER E WITH DOT ABOVE] + case '\u0119': // ę [LATIN SMALL LETTER E WITH OGONEK] + case '\u011B': // ě [LATIN SMALL LETTER E WITH CARON] + case '\u01DD': // ǝ [LATIN SMALL LETTER TURNED E] + case '\u0205': // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] + case '\u0207': // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] + case '\u0229': // ȩ [LATIN SMALL LETTER E WITH CEDILLA] + case '\u0247': // ɇ [LATIN SMALL LETTER E WITH STROKE] + case '\u0258': // ɘ [LATIN SMALL LETTER REVERSED E] + case '\u025B': // ɛ [LATIN SMALL LETTER OPEN E] + case '\u025C': // ɜ [LATIN SMALL LETTER REVERSED OPEN E] + case '\u025D': // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] + case '\u025E': // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] + case '\u029A': // ʚ [LATIN SMALL LETTER CLOSED OPEN E] + case '\u1D08': // ᴈ [LATIN SMALL LETTER TURNED OPEN E] + case '\u1D92': // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] + case '\u1D93': // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] + case '\u1D94': // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] + case '\u1E15': // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] + case '\u1E17': // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] + case '\u1E19': // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] + case '\u1E1B': // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] + case '\u1E1D': // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] + case '\u1EB9': // ẹ [LATIN SMALL LETTER E WITH DOT BELOW] + case '\u1EBB': // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] + case '\u1EBD': // ẽ [LATIN SMALL LETTER E WITH TILDE] + case '\u1EBF': // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] + case '\u1EC1': // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] + case '\u1EC3': // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1EC5': // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] + case '\u1EC7': // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + case '\u2091': // ₑ [LATIN SUBSCRIPT SMALL LETTER E] + case '\u24D4': // ⓔ [CIRCLED LATIN SMALL LETTER E] + case '\u2C78': // ⱸ [LATIN SMALL LETTER E WITH NOTCH] + case '\uFF45': // e [FULLWIDTH LATIN SMALL LETTER E] + output[outputPos++] = 'e'; + break; + case '\u24A0': // ⒠ [PARENTHESIZED LATIN SMALL LETTER E] + output[outputPos++] = '('; + output[outputPos++] = 'e'; + output[outputPos++] = ')'; + break; + case '\u0191': // Ƒ [LATIN CAPITAL LETTER F WITH HOOK] + case '\u1E1E': // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] + case '\u24BB': // Ⓕ [CIRCLED LATIN CAPITAL LETTER F] + case '\uA730': // ꜰ [LATIN LETTER SMALL CAPITAL F] + case '\uA77B': // Ꝼ [LATIN CAPITAL LETTER INSULAR F] + case '\uA7FB': // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] + case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F] + output[outputPos++] = 'F'; + break; + case '\u0192': // ƒ [LATIN SMALL LETTER F WITH HOOK] + case '\u1D6E': // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] + case '\u1D82': // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] + case '\u1E1F': // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] + case '\u1E9B': // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] + case '\u24D5': // ⓕ [CIRCLED LATIN SMALL LETTER F] + case '\uA77C': // ꝼ [LATIN SMALL LETTER INSULAR F] + case '\uFF46': // f [FULLWIDTH LATIN SMALL LETTER F] + output[outputPos++] = 'f'; + break; + case '\u24A1': // ⒡ [PARENTHESIZED LATIN SMALL LETTER F] + output[outputPos++] = '('; + output[outputPos++] = 'f'; + output[outputPos++] = ')'; + break; + case '\uFB00': // ff [LATIN SMALL LIGATURE FF] + output[outputPos++] = 'f'; + output[outputPos++] = 'f'; + break; + case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI] + output[outputPos++] = 'f'; + output[outputPos++] = 'f'; + output[outputPos++] = 'i'; + break; + case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL] + output[outputPos++] = 'f'; + output[outputPos++] = 'f'; + output[outputPos++] = 'l'; + break; + case '\uFB01': // fi [LATIN SMALL LIGATURE FI] + output[outputPos++] = 'f'; + output[outputPos++] = 'i'; + break; + case '\uFB02': // fl [LATIN SMALL LIGATURE FL] + output[outputPos++] = 'f'; + output[outputPos++] = 'l'; + break; + case '\u011C': // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] + case '\u011E': // Ğ [LATIN CAPITAL LETTER G WITH BREVE] + case '\u0120': // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] + case '\u0122': // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] + case '\u0193': // Ɠ [LATIN CAPITAL LETTER G WITH HOOK] + case '\u01E4': // Ǥ [LATIN CAPITAL LETTER G WITH STROKE] + case '\u01E5': // ǥ [LATIN SMALL LETTER G WITH STROKE] + case '\u01E6': // Ǧ [LATIN CAPITAL LETTER G WITH CARON] + case '\u01E7': // ǧ [LATIN SMALL LETTER G WITH CARON] + case '\u01F4': // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] + case '\u0262': // ɢ [LATIN LETTER SMALL CAPITAL G] + case '\u029B': // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] + case '\u1E20': // Ḡ [LATIN CAPITAL LETTER G WITH MACRON] + case '\u24BC': // Ⓖ [CIRCLED LATIN CAPITAL LETTER G] + case '\uA77D': // Ᵹ [LATIN CAPITAL LETTER INSULAR G] + case '\uA77E': // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] + case '\uFF27': // G [FULLWIDTH LATIN CAPITAL LETTER G] + output[outputPos++] = 'G'; + break; + case '\u011D': // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] + case '\u011F': // ğ [LATIN SMALL LETTER G WITH BREVE] + case '\u0121': // ġ [LATIN SMALL LETTER G WITH DOT ABOVE] + case '\u0123': // ģ [LATIN SMALL LETTER G WITH CEDILLA] + case '\u01F5': // ǵ [LATIN SMALL LETTER G WITH ACUTE] + case '\u0260': // ɠ [LATIN SMALL LETTER G WITH HOOK] + case '\u0261': // ɡ [LATIN SMALL LETTER SCRIPT G] + case '\u1D77': // ᵷ [LATIN SMALL LETTER TURNED G] + case '\u1D79': // ᵹ [LATIN SMALL LETTER INSULAR G] + case '\u1D83': // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] + case '\u1E21': // ḡ [LATIN SMALL LETTER G WITH MACRON] + case '\u24D6': // ⓖ [CIRCLED LATIN SMALL LETTER G] + case '\uA77F': // ꝿ [LATIN SMALL LETTER TURNED INSULAR G] + case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G] + output[outputPos++] = 'g'; + break; + case '\u24A2': // ⒢ [PARENTHESIZED LATIN SMALL LETTER G] + output[outputPos++] = '('; + output[outputPos++] = 'g'; + output[outputPos++] = ')'; + break; + case '\u0124': // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] + case '\u0126': // Ħ [LATIN CAPITAL LETTER H WITH STROKE] + case '\u021E': // Ȟ [LATIN CAPITAL LETTER H WITH CARON] + case '\u029C': // ʜ [LATIN LETTER SMALL CAPITAL H] + case '\u1E22': // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] + case '\u1E24': // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] + case '\u1E26': // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] + case '\u1E28': // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] + case '\u1E2A': // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] + case '\u24BD': // Ⓗ [CIRCLED LATIN CAPITAL LETTER H] + case '\u2C67': // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] + case '\u2C75': // Ⱶ [LATIN CAPITAL LETTER HALF H] + case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H] + output[outputPos++] = 'H'; + break; + case '\u0125': // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] + case '\u0127': // ħ [LATIN SMALL LETTER H WITH STROKE] + case '\u021F': // ȟ [LATIN SMALL LETTER H WITH CARON] + case '\u0265': // ɥ [LATIN SMALL LETTER TURNED H] + case '\u0266': // ɦ [LATIN SMALL LETTER H WITH HOOK] + case '\u02AE': // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] + case '\u02AF': // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] + case '\u1E23': // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] + case '\u1E25': // ḥ [LATIN SMALL LETTER H WITH DOT BELOW] + case '\u1E27': // ḧ [LATIN SMALL LETTER H WITH DIAERESIS] + case '\u1E29': // ḩ [LATIN SMALL LETTER H WITH CEDILLA] + case '\u1E2B': // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] + case '\u1E96': // ẖ [LATIN SMALL LETTER H WITH LINE BELOW] + case '\u24D7': // ⓗ [CIRCLED LATIN SMALL LETTER H] + case '\u2C68': // ⱨ [LATIN SMALL LETTER H WITH DESCENDER] + case '\u2C76': // ⱶ [LATIN SMALL LETTER HALF H] + case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H] + output[outputPos++] = 'h'; + break; + case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] + output[outputPos++] = 'H'; + output[outputPos++] = 'V'; + break; + case '\u24A3': // ⒣ [PARENTHESIZED LATIN SMALL LETTER H] + output[outputPos++] = '('; + output[outputPos++] = 'h'; + output[outputPos++] = ')'; + break; + case '\u0195': // ƕ [LATIN SMALL LETTER HV] + output[outputPos++] = 'h'; + output[outputPos++] = 'v'; + break; + case '\u00CC': // Ì [LATIN CAPITAL LETTER I WITH GRAVE] + case '\u00CD': // Í [LATIN CAPITAL LETTER I WITH ACUTE] + case '\u00CE': // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] + case '\u00CF': // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] + case '\u0128': // Ĩ [LATIN CAPITAL LETTER I WITH TILDE] + case '\u012A': // Ī [LATIN CAPITAL LETTER I WITH MACRON] + case '\u012C': // Ĭ [LATIN CAPITAL LETTER I WITH BREVE] + case '\u012E': // Į [LATIN CAPITAL LETTER I WITH OGONEK] + case '\u0130': // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] + case '\u0196': // Ɩ [LATIN CAPITAL LETTER IOTA] + case '\u0197': // Ɨ [LATIN CAPITAL LETTER I WITH STROKE] + case '\u01CF': // Ǐ [LATIN CAPITAL LETTER I WITH CARON] + case '\u0208': // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] + case '\u020A': // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] + case '\u026A': // ɪ [LATIN LETTER SMALL CAPITAL I] + case '\u1D7B': // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] + case '\u1E2C': // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] + case '\u1E2E': // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] + case '\u1EC8': // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] + case '\u1ECA': // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] + case '\u24BE': // Ⓘ [CIRCLED LATIN CAPITAL LETTER I] + case '\uA7FE': // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] + case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I] + output[outputPos++] = 'I'; + break; + case '\u00EC': // ì [LATIN SMALL LETTER I WITH GRAVE] + case '\u00ED': // í [LATIN SMALL LETTER I WITH ACUTE] + case '\u00EE': // î [LATIN SMALL LETTER I WITH CIRCUMFLEX] + case '\u00EF': // ï [LATIN SMALL LETTER I WITH DIAERESIS] + case '\u0129': // ĩ [LATIN SMALL LETTER I WITH TILDE] + case '\u012B': // ī [LATIN SMALL LETTER I WITH MACRON] + case '\u012D': // ĭ [LATIN SMALL LETTER I WITH BREVE] + case '\u012F': // į [LATIN SMALL LETTER I WITH OGONEK] + case '\u0131': // ı [LATIN SMALL LETTER DOTLESS I] + case '\u01D0': // ǐ [LATIN SMALL LETTER I WITH CARON] + case '\u0209': // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] + case '\u020B': // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] + case '\u0268': // ɨ [LATIN SMALL LETTER I WITH STROKE] + case '\u1D09': // ᴉ [LATIN SMALL LETTER TURNED I] + case '\u1D62': // ᵢ [LATIN SUBSCRIPT SMALL LETTER I] + case '\u1D7C': // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] + case '\u1D96': // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] + case '\u1E2D': // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] + case '\u1E2F': // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] + case '\u1EC9': // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] + case '\u1ECB': // ị [LATIN SMALL LETTER I WITH DOT BELOW] + case '\u2071': // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] + case '\u24D8': // ⓘ [CIRCLED LATIN SMALL LETTER I] + case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I] + output[outputPos++] = 'i'; + break; + case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ] + output[outputPos++] = 'I'; + output[outputPos++] = 'J'; + break; + case '\u24A4': // ⒤ [PARENTHESIZED LATIN SMALL LETTER I] + output[outputPos++] = '('; + output[outputPos++] = 'i'; + output[outputPos++] = ')'; + break; + case '\u0133': // ij [LATIN SMALL LIGATURE IJ] + output[outputPos++] = 'i'; + output[outputPos++] = 'j'; + break; + case '\u0134': // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] + case '\u0248': // Ɉ [LATIN CAPITAL LETTER J WITH STROKE] + case '\u1D0A': // ᴊ [LATIN LETTER SMALL CAPITAL J] + case '\u24BF': // Ⓙ [CIRCLED LATIN CAPITAL LETTER J] + case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J] + output[outputPos++] = 'J'; + break; + case '\u0135': // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] + case '\u01F0': // ǰ [LATIN SMALL LETTER J WITH CARON] + case '\u0237': // ȷ [LATIN SMALL LETTER DOTLESS J] + case '\u0249': // ɉ [LATIN SMALL LETTER J WITH STROKE] + case '\u025F': // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] + case '\u0284': // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] + case '\u029D': // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] + case '\u24D9': // ⓙ [CIRCLED LATIN SMALL LETTER J] + case '\u2C7C': // ⱼ [LATIN SUBSCRIPT SMALL LETTER J] + case '\uFF4A': // j [FULLWIDTH LATIN SMALL LETTER J] + output[outputPos++] = 'j'; + break; + case '\u24A5': // ⒥ [PARENTHESIZED LATIN SMALL LETTER J] + output[outputPos++] = '('; + output[outputPos++] = 'j'; + output[outputPos++] = ')'; + break; + case '\u0136': // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] + case '\u0198': // Ƙ [LATIN CAPITAL LETTER K WITH HOOK] + case '\u01E8': // Ǩ [LATIN CAPITAL LETTER K WITH CARON] + case '\u1D0B': // ᴋ [LATIN LETTER SMALL CAPITAL K] + case '\u1E30': // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] + case '\u1E32': // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] + case '\u1E34': // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] + case '\u24C0': // Ⓚ [CIRCLED LATIN CAPITAL LETTER K] + case '\u2C69': // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] + case '\uA740': // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] + case '\uA742': // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] + case '\uA744': // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] + case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K] + output[outputPos++] = 'K'; + break; + case '\u0137': // ķ [LATIN SMALL LETTER K WITH CEDILLA] + case '\u0199': // ƙ [LATIN SMALL LETTER K WITH HOOK] + case '\u01E9': // ǩ [LATIN SMALL LETTER K WITH CARON] + case '\u029E': // ʞ [LATIN SMALL LETTER TURNED K] + case '\u1D84': // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] + case '\u1E31': // ḱ [LATIN SMALL LETTER K WITH ACUTE] + case '\u1E33': // ḳ [LATIN SMALL LETTER K WITH DOT BELOW] + case '\u1E35': // ḵ [LATIN SMALL LETTER K WITH LINE BELOW] + case '\u24DA': // ⓚ [CIRCLED LATIN SMALL LETTER K] + case '\u2C6A': // ⱪ [LATIN SMALL LETTER K WITH DESCENDER] + case '\uA741': // ꝁ [LATIN SMALL LETTER K WITH STROKE] + case '\uA743': // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] + case '\uA745': // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] + case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K] + output[outputPos++] = 'k'; + break; + case '\u24A6': // ⒦ [PARENTHESIZED LATIN SMALL LETTER K] + output[outputPos++] = '('; + output[outputPos++] = 'k'; + output[outputPos++] = ')'; + break; + case '\u0139': // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] + case '\u013B': // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] + case '\u013D': // Ľ [LATIN CAPITAL LETTER L WITH CARON] + case '\u013F': // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] + case '\u0141': // Ł [LATIN CAPITAL LETTER L WITH STROKE] + case '\u023D': // Ƚ [LATIN CAPITAL LETTER L WITH BAR] + case '\u029F': // ʟ [LATIN LETTER SMALL CAPITAL L] + case '\u1D0C': // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] + case '\u1E36': // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] + case '\u1E38': // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] + case '\u1E3A': // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] + case '\u1E3C': // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] + case '\u24C1': // Ⓛ [CIRCLED LATIN CAPITAL LETTER L] + case '\u2C60': // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] + case '\u2C62': // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] + case '\uA746': // Ꝇ [LATIN CAPITAL LETTER BROKEN L] + case '\uA748': // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] + case '\uA780': // Ꞁ [LATIN CAPITAL LETTER TURNED L] + case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L] + output[outputPos++] = 'L'; + break; + case '\u013A': // ĺ [LATIN SMALL LETTER L WITH ACUTE] + case '\u013C': // ļ [LATIN SMALL LETTER L WITH CEDILLA] + case '\u013E': // ľ [LATIN SMALL LETTER L WITH CARON] + case '\u0140': // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] + case '\u0142': // ł [LATIN SMALL LETTER L WITH STROKE] + case '\u019A': // ƚ [LATIN SMALL LETTER L WITH BAR] + case '\u0234': // ȴ [LATIN SMALL LETTER L WITH CURL] + case '\u026B': // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] + case '\u026C': // ɬ [LATIN SMALL LETTER L WITH BELT] + case '\u026D': // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] + case '\u1D85': // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] + case '\u1E37': // ḷ [LATIN SMALL LETTER L WITH DOT BELOW] + case '\u1E39': // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] + case '\u1E3B': // ḻ [LATIN SMALL LETTER L WITH LINE BELOW] + case '\u1E3D': // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] + case '\u24DB': // ⓛ [CIRCLED LATIN SMALL LETTER L] + case '\u2C61': // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] + case '\uA747': // ꝇ [LATIN SMALL LETTER BROKEN L] + case '\uA749': // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] + case '\uA781': // ꞁ [LATIN SMALL LETTER TURNED L] + case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L] + output[outputPos++] = 'l'; + break; + case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ] + output[outputPos++] = 'L'; + output[outputPos++] = 'J'; + break; + case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] + output[outputPos++] = 'L'; + output[outputPos++] = 'L'; + break; + case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] + output[outputPos++] = 'L'; + output[outputPos++] = 'j'; + break; + case '\u24A7': // ⒧ [PARENTHESIZED LATIN SMALL LETTER L] + output[outputPos++] = '('; + output[outputPos++] = 'l'; + output[outputPos++] = ')'; + break; + case '\u01C9': // lj [LATIN SMALL LETTER LJ] + output[outputPos++] = 'l'; + output[outputPos++] = 'j'; + break; + case '\u1EFB': // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] + output[outputPos++] = 'l'; + output[outputPos++] = 'l'; + break; + case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH] + output[outputPos++] = 'l'; + output[outputPos++] = 's'; + break; + case '\u02AB': // ʫ [LATIN SMALL LETTER LZ DIGRAPH] + output[outputPos++] = 'l'; + output[outputPos++] = 'z'; + break; + case '\u019C': // Ɯ [LATIN CAPITAL LETTER TURNED M] + case '\u1D0D': // ᴍ [LATIN LETTER SMALL CAPITAL M] + case '\u1E3E': // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] + case '\u1E40': // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] + case '\u1E42': // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] + case '\u24C2': // Ⓜ [CIRCLED LATIN CAPITAL LETTER M] + case '\u2C6E': // Ɱ [LATIN CAPITAL LETTER M WITH HOOK] + case '\uA7FD': // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] + case '\uA7FF': // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] + case '\uFF2D': // M [FULLWIDTH LATIN CAPITAL LETTER M] + output[outputPos++] = 'M'; + break; + case '\u026F': // ɯ [LATIN SMALL LETTER TURNED M] + case '\u0270': // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] + case '\u0271': // ɱ [LATIN SMALL LETTER M WITH HOOK] + case '\u1D6F': // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] + case '\u1D86': // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] + case '\u1E3F': // ḿ [LATIN SMALL LETTER M WITH ACUTE] + case '\u1E41': // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] + case '\u1E43': // ṃ [LATIN SMALL LETTER M WITH DOT BELOW] + case '\u24DC': // ⓜ [CIRCLED LATIN SMALL LETTER M] + case '\uFF4D': // m [FULLWIDTH LATIN SMALL LETTER M] + output[outputPos++] = 'm'; + break; + case '\u24A8': // ⒨ [PARENTHESIZED LATIN SMALL LETTER M] + output[outputPos++] = '('; + output[outputPos++] = 'm'; + output[outputPos++] = ')'; + break; + case '\u00D1': // Ñ [LATIN CAPITAL LETTER N WITH TILDE] + case '\u0143': // Ń [LATIN CAPITAL LETTER N WITH ACUTE] + case '\u0145': // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] + case '\u0147': // Ň [LATIN CAPITAL LETTER N WITH CARON] + case '\u014A': // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] + case '\u019D': // Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] + case '\u01F8': // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] + case '\u0220': // Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] + case '\u0274': // ɴ [LATIN LETTER SMALL CAPITAL N] + case '\u1D0E': // ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] + case '\u1E44': // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] + case '\u1E46': // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] + case '\u1E48': // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] + case '\u1E4A': // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] + case '\u24C3': // Ⓝ [CIRCLED LATIN CAPITAL LETTER N] + case '\uFF2E': // N [FULLWIDTH LATIN CAPITAL LETTER N] + output[outputPos++] = 'N'; + break; + case '\u00F1': // ñ [LATIN SMALL LETTER N WITH TILDE] + case '\u0144': // ń [LATIN SMALL LETTER N WITH ACUTE] + case '\u0146': // ņ [LATIN SMALL LETTER N WITH CEDILLA] + case '\u0148': // ň [LATIN SMALL LETTER N WITH CARON] + case '\u0149': // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] + case '\u014B': // ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] + case '\u019E': // ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] + case '\u01F9': // ǹ [LATIN SMALL LETTER N WITH GRAVE] + case '\u0235': // ȵ [LATIN SMALL LETTER N WITH CURL] + case '\u0272': // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] + case '\u0273': // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] + case '\u1D70': // ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE] + case '\u1D87': // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] + case '\u1E45': // ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] + case '\u1E47': // ṇ [LATIN SMALL LETTER N WITH DOT BELOW] + case '\u1E49': // ṉ [LATIN SMALL LETTER N WITH LINE BELOW] + case '\u1E4B': // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] + case '\u207F': // ⁿ [SUPERSCRIPT LATIN SMALL LETTER N] + case '\u24DD': // ⓝ [CIRCLED LATIN SMALL LETTER N] + case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N] + output[outputPos++] = 'n'; + break; + case '\u01CA': // NJ [LATIN CAPITAL LETTER NJ] + output[outputPos++] = 'N'; + output[outputPos++] = 'J'; + break; + case '\u01CB': // Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J] + output[outputPos++] = 'N'; + output[outputPos++] = 'j'; + break; + case '\u24A9': // ⒩ [PARENTHESIZED LATIN SMALL LETTER N] + output[outputPos++] = '('; + output[outputPos++] = 'n'; + output[outputPos++] = ')'; + break; + case '\u01CC': // nj [LATIN SMALL LETTER NJ] + output[outputPos++] = 'n'; + output[outputPos++] = 'j'; + break; + case '\u00D2': // Ò [LATIN CAPITAL LETTER O WITH GRAVE] + case '\u00D3': // Ó [LATIN CAPITAL LETTER O WITH ACUTE] + case '\u00D4': // Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] + case '\u00D5': // Õ [LATIN CAPITAL LETTER O WITH TILDE] + case '\u00D6': // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] + case '\u00D8': // Ø [LATIN CAPITAL LETTER O WITH STROKE] + case '\u014C': // Ō [LATIN CAPITAL LETTER O WITH MACRON] + case '\u014E': // Ŏ [LATIN CAPITAL LETTER O WITH BREVE] + case '\u0150': // Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] + case '\u0186': // Ɔ [LATIN CAPITAL LETTER OPEN O] + case '\u019F': // Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] + case '\u01A0': // Ơ [LATIN CAPITAL LETTER O WITH HORN] + case '\u01D1': // Ǒ [LATIN CAPITAL LETTER O WITH CARON] + case '\u01EA': // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] + case '\u01EC': // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] + case '\u01FE': // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] + case '\u020C': // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] + case '\u020E': // Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] + case '\u022A': // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] + case '\u022C': // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] + case '\u022E': // Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE] + case '\u0230': // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] + case '\u1D0F': // ᴏ [LATIN LETTER SMALL CAPITAL O] + case '\u1D10': // ᴐ [LATIN LETTER SMALL CAPITAL OPEN O] + case '\u1E4C': // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] + case '\u1E4E': // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] + case '\u1E50': // Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] + case '\u1E52': // Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] + case '\u1ECC': // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] + case '\u1ECE': // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] + case '\u1ED0': // Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] + case '\u1ED2': // Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] + case '\u1ED4': // Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1ED6': // Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] + case '\u1ED8': // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EDA': // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] + case '\u1EDC': // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] + case '\u1EDE': // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] + case '\u1EE0': // Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE] + case '\u1EE2': // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] + case '\u24C4': // Ⓞ [CIRCLED LATIN CAPITAL LETTER O] + case '\uA74A': // Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] + case '\uA74C': // Ꝍ [LATIN CAPITAL LETTER O WITH LOOP] + case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O] + output[outputPos++] = 'O'; + break; + case '\u00F2': // ò [LATIN SMALL LETTER O WITH GRAVE] + case '\u00F3': // ó [LATIN SMALL LETTER O WITH ACUTE] + case '\u00F4': // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] + case '\u00F5': // õ [LATIN SMALL LETTER O WITH TILDE] + case '\u00F6': // ö [LATIN SMALL LETTER O WITH DIAERESIS] + case '\u00F8': // ø [LATIN SMALL LETTER O WITH STROKE] + case '\u014D': // ō [LATIN SMALL LETTER O WITH MACRON] + case '\u014F': // ŏ [LATIN SMALL LETTER O WITH BREVE] + case '\u0151': // ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE] + case '\u01A1': // ơ [LATIN SMALL LETTER O WITH HORN] + case '\u01D2': // ǒ [LATIN SMALL LETTER O WITH CARON] + case '\u01EB': // ǫ [LATIN SMALL LETTER O WITH OGONEK] + case '\u01ED': // ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] + case '\u01FF': // ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] + case '\u020D': // ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE] + case '\u020F': // ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE] + case '\u022B': // ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] + case '\u022D': // ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON] + case '\u022F': // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] + case '\u0231': // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] + case '\u0254': // ɔ [LATIN SMALL LETTER OPEN O] + case '\u0275': // ɵ [LATIN SMALL LETTER BARRED O] + case '\u1D16': // ᴖ [LATIN SMALL LETTER TOP HALF O] + case '\u1D17': // ᴗ [LATIN SMALL LETTER BOTTOM HALF O] + case '\u1D97': // ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] + case '\u1E4D': // ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE] + case '\u1E4F': // ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] + case '\u1E51': // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] + case '\u1E53': // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] + case '\u1ECD': // ọ [LATIN SMALL LETTER O WITH DOT BELOW] + case '\u1ECF': // ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE] + case '\u1ED1': // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] + case '\u1ED3': // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] + case '\u1ED5': // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] + case '\u1ED7': // ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] + case '\u1ED9': // ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] + case '\u1EDB': // ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE] + case '\u1EDD': // ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE] + case '\u1EDF': // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] + case '\u1EE1': // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] + case '\u1EE3': // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] + case '\u2092': // ₒ [LATIN SUBSCRIPT SMALL LETTER O] + case '\u24DE': // ⓞ [CIRCLED LATIN SMALL LETTER O] + case '\u2C7A': // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] + case '\uA74B': // ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] + case '\uA74D': // ꝍ [LATIN SMALL LETTER O WITH LOOP] + case '\uFF4F': // o [FULLWIDTH LATIN SMALL LETTER O] + output[outputPos++] = 'o'; + break; + case '\u0152': // Œ [LATIN CAPITAL LIGATURE OE] + case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE] + output[outputPos++] = 'O'; + output[outputPos++] = 'E'; + break; + case '\uA74E': // Ꝏ [LATIN CAPITAL LETTER OO] + output[outputPos++] = 'O'; + output[outputPos++] = 'O'; + break; + case '\u0222': // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] + case '\u1D15': // ᴕ [LATIN LETTER SMALL CAPITAL OU] + output[outputPos++] = 'O'; + output[outputPos++] = 'U'; + break; + case '\u24AA': // ⒪ [PARENTHESIZED LATIN SMALL LETTER O] + output[outputPos++] = '('; + output[outputPos++] = 'o'; + output[outputPos++] = ')'; + break; + case '\u0153': // œ [LATIN SMALL LIGATURE OE] + case '\u1D14': // ᴔ [LATIN SMALL LETTER TURNED OE] + output[outputPos++] = 'o'; + output[outputPos++] = 'e'; + break; + case '\uA74F': // ꝏ [LATIN SMALL LETTER OO] + output[outputPos++] = 'o'; + output[outputPos++] = 'o'; + break; + case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] + output[outputPos++] = 'o'; + output[outputPos++] = 'u'; + break; + case '\u01A4': // Ƥ [LATIN CAPITAL LETTER P WITH HOOK] + case '\u1D18': // ᴘ [LATIN LETTER SMALL CAPITAL P] + case '\u1E54': // Ṕ [LATIN CAPITAL LETTER P WITH ACUTE] + case '\u1E56': // Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE] + case '\u24C5': // Ⓟ [CIRCLED LATIN CAPITAL LETTER P] + case '\u2C63': // Ᵽ [LATIN CAPITAL LETTER P WITH STROKE] + case '\uA750': // Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] + case '\uA752': // Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH] + case '\uA754': // Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] + case '\uFF30': // P [FULLWIDTH LATIN CAPITAL LETTER P] + output[outputPos++] = 'P'; + break; + case '\u01A5': // ƥ [LATIN SMALL LETTER P WITH HOOK] + case '\u1D71': // ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE] + case '\u1D7D': // ᵽ [LATIN SMALL LETTER P WITH STROKE] + case '\u1D88': // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] + case '\u1E55': // ṕ [LATIN SMALL LETTER P WITH ACUTE] + case '\u1E57': // ṗ [LATIN SMALL LETTER P WITH DOT ABOVE] + case '\u24DF': // ⓟ [CIRCLED LATIN SMALL LETTER P] + case '\uA751': // ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] + case '\uA753': // ꝓ [LATIN SMALL LETTER P WITH FLOURISH] + case '\uA755': // ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL] + case '\uA7FC': // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] + case '\uFF50': // p [FULLWIDTH LATIN SMALL LETTER P] + output[outputPos++] = 'p'; + break; + case '\u24AB': // ⒫ [PARENTHESIZED LATIN SMALL LETTER P] + output[outputPos++] = '('; + output[outputPos++] = 'p'; + output[outputPos++] = ')'; + break; + case '\u024A': // Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] + case '\u24C6': // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] + case '\uA756': // Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] + case '\uA758': // Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] + case '\uFF31': // Q [FULLWIDTH LATIN CAPITAL LETTER Q] + output[outputPos++] = 'Q'; + break; + case '\u0138': // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] + case '\u024B': // ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL] + case '\u02A0': // ʠ [LATIN SMALL LETTER Q WITH HOOK] + case '\u24E0': // ⓠ [CIRCLED LATIN SMALL LETTER Q] + case '\uA757': // ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] + case '\uA759': // ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] + case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q] + output[outputPos++] = 'q'; + break; + case '\u24AC': // ⒬ [PARENTHESIZED LATIN SMALL LETTER Q] + output[outputPos++] = '('; + output[outputPos++] = 'q'; + output[outputPos++] = ')'; + break; + case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH] + output[outputPos++] = 'q'; + output[outputPos++] = 'p'; + break; + case '\u0154': // Ŕ [LATIN CAPITAL LETTER R WITH ACUTE] + case '\u0156': // Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA] + case '\u0158': // Ř [LATIN CAPITAL LETTER R WITH CARON] + case '\u0210': // Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] + case '\u0212': // Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] + case '\u024C': // Ɍ [LATIN CAPITAL LETTER R WITH STROKE] + case '\u0280': // ʀ [LATIN LETTER SMALL CAPITAL R] + case '\u0281': // ʁ [LATIN LETTER SMALL CAPITAL INVERTED R] + case '\u1D19': // ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R] + case '\u1D1A': // ᴚ [LATIN LETTER SMALL CAPITAL TURNED R] + case '\u1E58': // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] + case '\u1E5A': // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] + case '\u1E5C': // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] + case '\u1E5E': // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] + case '\u24C7': // Ⓡ [CIRCLED LATIN CAPITAL LETTER R] + case '\u2C64': // Ɽ [LATIN CAPITAL LETTER R WITH TAIL] + case '\uA75A': // Ꝛ [LATIN CAPITAL LETTER R ROTUNDA] + case '\uA782': // Ꞃ [LATIN CAPITAL LETTER INSULAR R] + case '\uFF32': // R [FULLWIDTH LATIN CAPITAL LETTER R] + output[outputPos++] = 'R'; + break; + case '\u0155': // ŕ [LATIN SMALL LETTER R WITH ACUTE] + case '\u0157': // ŗ [LATIN SMALL LETTER R WITH CEDILLA] + case '\u0159': // ř [LATIN SMALL LETTER R WITH CARON] + case '\u0211': // ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] + case '\u0213': // ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE] + case '\u024D': // ɍ [LATIN SMALL LETTER R WITH STROKE] + case '\u027C': // ɼ [LATIN SMALL LETTER R WITH LONG LEG] + case '\u027D': // ɽ [LATIN SMALL LETTER R WITH TAIL] + case '\u027E': // ɾ [LATIN SMALL LETTER R WITH FISHHOOK] + case '\u027F': // ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] + case '\u1D63': // ᵣ [LATIN SUBSCRIPT SMALL LETTER R] + case '\u1D72': // ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE] + case '\u1D73': // ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] + case '\u1D89': // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] + case '\u1E59': // ṙ [LATIN SMALL LETTER R WITH DOT ABOVE] + case '\u1E5B': // ṛ [LATIN SMALL LETTER R WITH DOT BELOW] + case '\u1E5D': // ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] + case '\u1E5F': // ṟ [LATIN SMALL LETTER R WITH LINE BELOW] + case '\u24E1': // ⓡ [CIRCLED LATIN SMALL LETTER R] + case '\uA75B': // ꝛ [LATIN SMALL LETTER R ROTUNDA] + case '\uA783': // ꞃ [LATIN SMALL LETTER INSULAR R] + case '\uFF52': // r [FULLWIDTH LATIN SMALL LETTER R] + output[outputPos++] = 'r'; + break; + case '\u24AD': // ⒭ [PARENTHESIZED LATIN SMALL LETTER R] + output[outputPos++] = '('; + output[outputPos++] = 'r'; + output[outputPos++] = ')'; + break; + case '\u015A': // Ś [LATIN CAPITAL LETTER S WITH ACUTE] + case '\u015C': // Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] + case '\u015E': // Ş [LATIN CAPITAL LETTER S WITH CEDILLA] + case '\u0160': // Š [LATIN CAPITAL LETTER S WITH CARON] + case '\u0218': // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] + case '\u1E60': // Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE] + case '\u1E62': // Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW] + case '\u1E64': // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] + case '\u1E66': // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] + case '\u1E68': // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] + case '\u24C8': // Ⓢ [CIRCLED LATIN CAPITAL LETTER S] + case '\uA731': // ꜱ [LATIN LETTER SMALL CAPITAL S] + case '\uA785': // ꞅ [LATIN SMALL LETTER INSULAR S] + case '\uFF33': // S [FULLWIDTH LATIN CAPITAL LETTER S] + output[outputPos++] = 'S'; + break; + case '\u015B': // ś [LATIN SMALL LETTER S WITH ACUTE] + case '\u015D': // ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX] + case '\u015F': // ş [LATIN SMALL LETTER S WITH CEDILLA] + case '\u0161': // š [LATIN SMALL LETTER S WITH CARON] + case '\u017F': // ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] + case '\u0219': // ș [LATIN SMALL LETTER S WITH COMMA BELOW] + case '\u023F': // ȿ [LATIN SMALL LETTER S WITH SWASH TAIL] + case '\u0282': // ʂ [LATIN SMALL LETTER S WITH HOOK] + case '\u1D74': // ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE] + case '\u1D8A': // ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK] + case '\u1E61': // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] + case '\u1E63': // ṣ [LATIN SMALL LETTER S WITH DOT BELOW] + case '\u1E65': // ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] + case '\u1E67': // ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] + case '\u1E69': // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] + case '\u1E9C': // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] + case '\u1E9D': // ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE] + case '\u24E2': // ⓢ [CIRCLED LATIN SMALL LETTER S] + case '\uA784': // Ꞅ [LATIN CAPITAL LETTER INSULAR S] + case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S] + output[outputPos++] = 's'; + break; + case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S] + output[outputPos++] = 'S'; + output[outputPos++] = 'S'; + break; + case '\u24AE': // ⒮ [PARENTHESIZED LATIN SMALL LETTER S] + output[outputPos++] = '('; + output[outputPos++] = 's'; + output[outputPos++] = ')'; + break; + case '\u00DF': // ß [LATIN SMALL LETTER SHARP S] + output[outputPos++] = 's'; + output[outputPos++] = 's'; + break; + case '\uFB06': // st [LATIN SMALL LIGATURE ST] + output[outputPos++] = 's'; + output[outputPos++] = 't'; + break; + case '\u0162': // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA] + case '\u0164': // Ť [LATIN CAPITAL LETTER T WITH CARON] + case '\u0166': // Ŧ [LATIN CAPITAL LETTER T WITH STROKE] + case '\u01AC': // Ƭ [LATIN CAPITAL LETTER T WITH HOOK] + case '\u01AE': // Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] + case '\u021A': // Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW] + case '\u023E': // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] + case '\u1D1B': // ᴛ [LATIN LETTER SMALL CAPITAL T] + case '\u1E6A': // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] + case '\u1E6C': // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] + case '\u1E6E': // Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW] + case '\u1E70': // Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] + case '\u24C9': // Ⓣ [CIRCLED LATIN CAPITAL LETTER T] + case '\uA786': // Ꞇ [LATIN CAPITAL LETTER INSULAR T] + case '\uFF34': // T [FULLWIDTH LATIN CAPITAL LETTER T] + output[outputPos++] = 'T'; + break; + case '\u0163': // ţ [LATIN SMALL LETTER T WITH CEDILLA] + case '\u0165': // ť [LATIN SMALL LETTER T WITH CARON] + case '\u0167': // ŧ [LATIN SMALL LETTER T WITH STROKE] + case '\u01AB': // ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK] + case '\u01AD': // ƭ [LATIN SMALL LETTER T WITH HOOK] + case '\u021B': // ț [LATIN SMALL LETTER T WITH COMMA BELOW] + case '\u0236': // ȶ [LATIN SMALL LETTER T WITH CURL] + case '\u0287': // ʇ [LATIN SMALL LETTER TURNED T] + case '\u0288': // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] + case '\u1D75': // ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE] + case '\u1E6B': // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] + case '\u1E6D': // ṭ [LATIN SMALL LETTER T WITH DOT BELOW] + case '\u1E6F': // ṯ [LATIN SMALL LETTER T WITH LINE BELOW] + case '\u1E71': // ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] + case '\u1E97': // ẗ [LATIN SMALL LETTER T WITH DIAERESIS] + case '\u24E3': // ⓣ [CIRCLED LATIN SMALL LETTER T] + case '\u2C66': // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] + case '\uFF54': // t [FULLWIDTH LATIN SMALL LETTER T] + output[outputPos++] = 't'; + break; + case '\u00DE': // Þ [LATIN CAPITAL LETTER THORN] + case '\uA766': // Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] + output[outputPos++] = 'T'; + output[outputPos++] = 'H'; + break; + case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ] + output[outputPos++] = 'T'; + output[outputPos++] = 'Z'; + break; + case '\u24AF': // ⒯ [PARENTHESIZED LATIN SMALL LETTER T] + output[outputPos++] = '('; + output[outputPos++] = 't'; + output[outputPos++] = ')'; + break; + case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] + output[outputPos++] = 't'; + output[outputPos++] = 'c'; + break; + case '\u00FE': // þ [LATIN SMALL LETTER THORN] + case '\u1D7A': // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] + case '\uA767': // ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] + output[outputPos++] = 't'; + output[outputPos++] = 'h'; + break; + case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH] + output[outputPos++] = 't'; + output[outputPos++] = 's'; + break; + case '\uA729': // ꜩ [LATIN SMALL LETTER TZ] + output[outputPos++] = 't'; + output[outputPos++] = 'z'; + break; + case '\u00D9': // Ù [LATIN CAPITAL LETTER U WITH GRAVE] + case '\u00DA': // Ú [LATIN CAPITAL LETTER U WITH ACUTE] + case '\u00DB': // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] + case '\u00DC': // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] + case '\u0168': // Ũ [LATIN CAPITAL LETTER U WITH TILDE] + case '\u016A': // Ū [LATIN CAPITAL LETTER U WITH MACRON] + case '\u016C': // Ŭ [LATIN CAPITAL LETTER U WITH BREVE] + case '\u016E': // Ů [LATIN CAPITAL LETTER U WITH RING ABOVE] + case '\u0170': // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] + case '\u0172': // Ų [LATIN CAPITAL LETTER U WITH OGONEK] + case '\u01AF': // Ư [LATIN CAPITAL LETTER U WITH HORN] + case '\u01D3': // Ǔ [LATIN CAPITAL LETTER U WITH CARON] + case '\u01D5': // Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] + case '\u01D7': // Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] + case '\u01D9': // Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] + case '\u01DB': // Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] + case '\u0214': // Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] + case '\u0216': // Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE] + case '\u0244': // Ʉ [LATIN CAPITAL LETTER U BAR] + case '\u1D1C': // ᴜ [LATIN LETTER SMALL CAPITAL U] + case '\u1D7E': // ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] + case '\u1E72': // Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] + case '\u1E74': // Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW] + case '\u1E76': // Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] + case '\u1E78': // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] + case '\u1E7A': // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] + case '\u1EE4': // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] + case '\u1EE6': // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] + case '\u1EE8': // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] + case '\u1EEA': // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] + case '\u1EEC': // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] + case '\u1EEE': // Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE] + case '\u1EF0': // Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] + case '\u24CA': // Ⓤ [CIRCLED LATIN CAPITAL LETTER U] + case '\uFF35': // U [FULLWIDTH LATIN CAPITAL LETTER U] + output[outputPos++] = 'U'; + break; + case '\u00F9': // ù [LATIN SMALL LETTER U WITH GRAVE] + case '\u00FA': // ú [LATIN SMALL LETTER U WITH ACUTE] + case '\u00FB': // û [LATIN SMALL LETTER U WITH CIRCUMFLEX] + case '\u00FC': // ü [LATIN SMALL LETTER U WITH DIAERESIS] + case '\u0169': // ũ [LATIN SMALL LETTER U WITH TILDE] + case '\u016B': // ū [LATIN SMALL LETTER U WITH MACRON] + case '\u016D': // ŭ [LATIN SMALL LETTER U WITH BREVE] + case '\u016F': // ů [LATIN SMALL LETTER U WITH RING ABOVE] + case '\u0171': // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] + case '\u0173': // ų [LATIN SMALL LETTER U WITH OGONEK] + case '\u01B0': // ư [LATIN SMALL LETTER U WITH HORN] + case '\u01D4': // ǔ [LATIN SMALL LETTER U WITH CARON] + case '\u01D6': // ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] + case '\u01D8': // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] + case '\u01DA': // ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] + case '\u01DC': // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] + case '\u0215': // ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE] + case '\u0217': // ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE] + case '\u0289': // ʉ [LATIN SMALL LETTER U BAR] + case '\u1D64': // ᵤ [LATIN SUBSCRIPT SMALL LETTER U] + case '\u1D99': // ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] + case '\u1E73': // ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] + case '\u1E75': // ṵ [LATIN SMALL LETTER U WITH TILDE BELOW] + case '\u1E77': // ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] + case '\u1E79': // ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] + case '\u1E7B': // ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] + case '\u1EE5': // ụ [LATIN SMALL LETTER U WITH DOT BELOW] + case '\u1EE7': // ủ [LATIN SMALL LETTER U WITH HOOK ABOVE] + case '\u1EE9': // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] + case '\u1EEB': // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] + case '\u1EED': // ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] + case '\u1EEF': // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] + case '\u1EF1': // ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] + case '\u24E4': // ⓤ [CIRCLED LATIN SMALL LETTER U] + case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U] + output[outputPos++] = 'u'; + break; + case '\u24B0': // ⒰ [PARENTHESIZED LATIN SMALL LETTER U] + output[outputPos++] = '('; + output[outputPos++] = 'u'; + output[outputPos++] = ')'; + break; + case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE] + output[outputPos++] = 'u'; + output[outputPos++] = 'e'; + break; + case '\u01B2': // Ʋ [LATIN CAPITAL LETTER V WITH HOOK] + case '\u0245': // Ʌ [LATIN CAPITAL LETTER TURNED V] + case '\u1D20': // ᴠ [LATIN LETTER SMALL CAPITAL V] + case '\u1E7C': // Ṽ [LATIN CAPITAL LETTER V WITH TILDE] + case '\u1E7E': // Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW] + case '\u1EFC': // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] + case '\u24CB': // Ⓥ [CIRCLED LATIN CAPITAL LETTER V] + case '\uA75E': // Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] + case '\uA768': // Ꝩ [LATIN CAPITAL LETTER VEND] + case '\uFF36': // V [FULLWIDTH LATIN CAPITAL LETTER V] + output[outputPos++] = 'V'; + break; + case '\u028B': // ʋ [LATIN SMALL LETTER V WITH HOOK] + case '\u028C': // ʌ [LATIN SMALL LETTER TURNED V] + case '\u1D65': // ᵥ [LATIN SUBSCRIPT SMALL LETTER V] + case '\u1D8C': // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK] + case '\u1E7D': // ṽ [LATIN SMALL LETTER V WITH TILDE] + case '\u1E7F': // ṿ [LATIN SMALL LETTER V WITH DOT BELOW] + case '\u24E5': // ⓥ [CIRCLED LATIN SMALL LETTER V] + case '\u2C71': // ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK] + case '\u2C74': // ⱴ [LATIN SMALL LETTER V WITH CURL] + case '\uA75F': // ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE] + case '\uFF56': // v [FULLWIDTH LATIN SMALL LETTER V] + output[outputPos++] = 'v'; + break; + case '\uA760': // Ꝡ [LATIN CAPITAL LETTER VY] + output[outputPos++] = 'V'; + output[outputPos++] = 'Y'; + break; + case '\u24B1': // ⒱ [PARENTHESIZED LATIN SMALL LETTER V] + output[outputPos++] = '('; + output[outputPos++] = 'v'; + output[outputPos++] = ')'; + break; + case '\uA761': // ꝡ [LATIN SMALL LETTER VY] + output[outputPos++] = 'v'; + output[outputPos++] = 'y'; + break; + case '\u0174': // Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] + case '\u01F7': // Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] + case '\u1D21': // ᴡ [LATIN LETTER SMALL CAPITAL W] + case '\u1E80': // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] + case '\u1E82': // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] + case '\u1E84': // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] + case '\u1E86': // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] + case '\u1E88': // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] + case '\u24CC': // Ⓦ [CIRCLED LATIN CAPITAL LETTER W] + case '\u2C72': // Ⱳ [LATIN CAPITAL LETTER W WITH HOOK] + case '\uFF37': // W [FULLWIDTH LATIN CAPITAL LETTER W] + output[outputPos++] = 'W'; + break; + case '\u0175': // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] + case '\u01BF': // ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] + case '\u028D': // ʍ [LATIN SMALL LETTER TURNED W] + case '\u1E81': // ẁ [LATIN SMALL LETTER W WITH GRAVE] + case '\u1E83': // ẃ [LATIN SMALL LETTER W WITH ACUTE] + case '\u1E85': // ẅ [LATIN SMALL LETTER W WITH DIAERESIS] + case '\u1E87': // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] + case '\u1E89': // ẉ [LATIN SMALL LETTER W WITH DOT BELOW] + case '\u1E98': // ẘ [LATIN SMALL LETTER W WITH RING ABOVE] + case '\u24E6': // ⓦ [CIRCLED LATIN SMALL LETTER W] + case '\u2C73': // ⱳ [LATIN SMALL LETTER W WITH HOOK] + case '\uFF57': // w [FULLWIDTH LATIN SMALL LETTER W] + output[outputPos++] = 'w'; + break; + case '\u24B2': // ⒲ [PARENTHESIZED LATIN SMALL LETTER W] + output[outputPos++] = '('; + output[outputPos++] = 'w'; + output[outputPos++] = ')'; + break; + case '\u1E8A': // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] + case '\u1E8C': // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] + case '\u24CD': // Ⓧ [CIRCLED LATIN CAPITAL LETTER X] + case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X] + output[outputPos++] = 'X'; + break; + case '\u1D8D': // ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK] + case '\u1E8B': // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] + case '\u1E8D': // ẍ [LATIN SMALL LETTER X WITH DIAERESIS] + case '\u2093': // ₓ [LATIN SUBSCRIPT SMALL LETTER X] + case '\u24E7': // ⓧ [CIRCLED LATIN SMALL LETTER X] + case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X] + output[outputPos++] = 'x'; + break; + case '\u24B3': // ⒳ [PARENTHESIZED LATIN SMALL LETTER X] + output[outputPos++] = '('; + output[outputPos++] = 'x'; + output[outputPos++] = ')'; + break; + case '\u00DD': // Ý [LATIN CAPITAL LETTER Y WITH ACUTE] + case '\u0176': // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] + case '\u0178': // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] + case '\u01B3': // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] + case '\u0232': // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] + case '\u024E': // Ɏ [LATIN CAPITAL LETTER Y WITH STROKE] + case '\u028F': // ʏ [LATIN LETTER SMALL CAPITAL Y] + case '\u1E8E': // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] + case '\u1EF2': // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] + case '\u1EF4': // Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW] + case '\u1EF6': // Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] + case '\u1EF8': // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] + case '\u1EFE': // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] + case '\u24CE': // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] + case '\uFF39': // Y [FULLWIDTH LATIN CAPITAL LETTER Y] + output[outputPos++] = 'Y'; + break; + case '\u00FD': // ý [LATIN SMALL LETTER Y WITH ACUTE] + case '\u00FF': // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] + case '\u0177': // ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX] + case '\u01B4': // ƴ [LATIN SMALL LETTER Y WITH HOOK] + case '\u0233': // ȳ [LATIN SMALL LETTER Y WITH MACRON] + case '\u024F': // ɏ [LATIN SMALL LETTER Y WITH STROKE] + case '\u028E': // ʎ [LATIN SMALL LETTER TURNED Y] + case '\u1E8F': // ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE] + case '\u1E99': // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] + case '\u1EF3': // ỳ [LATIN SMALL LETTER Y WITH GRAVE] + case '\u1EF5': // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] + case '\u1EF7': // ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE] + case '\u1EF9': // ỹ [LATIN SMALL LETTER Y WITH TILDE] + case '\u1EFF': // ỿ [LATIN SMALL LETTER Y WITH LOOP] + case '\u24E8': // ⓨ [CIRCLED LATIN SMALL LETTER Y] + case '\uFF59': // y [FULLWIDTH LATIN SMALL LETTER Y] + output[outputPos++] = 'y'; + break; + case '\u24B4': // ⒴ [PARENTHESIZED LATIN SMALL LETTER Y] + output[outputPos++] = '('; + output[outputPos++] = 'y'; + output[outputPos++] = ')'; + break; + case '\u0179': // Ź [LATIN CAPITAL LETTER Z WITH ACUTE] + case '\u017B': // Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE] + case '\u017D': // Ž [LATIN CAPITAL LETTER Z WITH CARON] + case '\u01B5': // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] + case '\u021C': // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] + case '\u0224': // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] + case '\u1D22': // ᴢ [LATIN LETTER SMALL CAPITAL Z] + case '\u1E90': // Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] + case '\u1E92': // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] + case '\u1E94': // Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW] + case '\u24CF': // Ⓩ [CIRCLED LATIN CAPITAL LETTER Z] + case '\u2C6B': // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] + case '\uA762': // Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z] + case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z] + output[outputPos++] = 'Z'; + break; + case '\u017A': // ź [LATIN SMALL LETTER Z WITH ACUTE] + case '\u017C': // ż [LATIN SMALL LETTER Z WITH DOT ABOVE] + case '\u017E': // ž [LATIN SMALL LETTER Z WITH CARON] + case '\u01B6': // ƶ [LATIN SMALL LETTER Z WITH STROKE] + case '\u021D': // ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] + case '\u0225': // ȥ [LATIN SMALL LETTER Z WITH HOOK] + case '\u0240': // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL] + case '\u0290': // ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] + case '\u0291': // ʑ [LATIN SMALL LETTER Z WITH CURL] + case '\u1D76': // ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] + case '\u1D8E': // ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK] + case '\u1E91': // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] + case '\u1E93': // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] + case '\u1E95': // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] + case '\u24E9': // ⓩ [CIRCLED LATIN SMALL LETTER Z] + case '\u2C6C': // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] + case '\uA763': // ꝣ [LATIN SMALL LETTER VISIGOTHIC Z] + case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z] + output[outputPos++] = 'z'; + break; + case '\u24B5': // ⒵ [PARENTHESIZED LATIN SMALL LETTER Z] + output[outputPos++] = '('; + output[outputPos++] = 'z'; + output[outputPos++] = ')'; + break; + case '\u2070': // ⁰ [SUPERSCRIPT ZERO] + case '\u2080': // ₀ [SUBSCRIPT ZERO] + case '\u24EA': // ⓪ [CIRCLED DIGIT ZERO] + case '\u24FF': // ⓿ [NEGATIVE CIRCLED DIGIT ZERO] + case '\uFF10': // 0 [FULLWIDTH DIGIT ZERO] + output[outputPos++] = '0'; + break; + case '\u00B9': // ¹ [SUPERSCRIPT ONE] + case '\u2081': // ₁ [SUBSCRIPT ONE] + case '\u2460': // ① [CIRCLED DIGIT ONE] + case '\u24F5': // ⓵ [DOUBLE CIRCLED DIGIT ONE] + case '\u2776': // ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE] + case '\u2780': // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] + case '\u278A': // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] + case '\uFF11': // 1 [FULLWIDTH DIGIT ONE] + output[outputPos++] = '1'; + break; + case '\u2488': // ⒈ [DIGIT ONE FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '.'; + break; + case '\u2474': // ⑴ [PARENTHESIZED DIGIT ONE] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = ')'; + break; + case '\u00B2': // ² [SUPERSCRIPT TWO] + case '\u2082': // ₂ [SUBSCRIPT TWO] + case '\u2461': // ② [CIRCLED DIGIT TWO] + case '\u24F6': // ⓶ [DOUBLE CIRCLED DIGIT TWO] + case '\u2777': // ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO] + case '\u2781': // ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] + case '\u278B': // ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] + case '\uFF12': // 2 [FULLWIDTH DIGIT TWO] + output[outputPos++] = '2'; + break; + case '\u2489': // ⒉ [DIGIT TWO FULL STOP] + output[outputPos++] = '2'; + output[outputPos++] = '.'; + break; + case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO] + output[outputPos++] = '('; + output[outputPos++] = '2'; + output[outputPos++] = ')'; + break; + case '\u00B3': // ³ [SUPERSCRIPT THREE] + case '\u2083': // ₃ [SUBSCRIPT THREE] + case '\u2462': // ③ [CIRCLED DIGIT THREE] + case '\u24F7': // ⓷ [DOUBLE CIRCLED DIGIT THREE] + case '\u2778': // ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE] + case '\u2782': // ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] + case '\u278C': // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] + case '\uFF13': // 3 [FULLWIDTH DIGIT THREE] + output[outputPos++] = '3'; + break; + case '\u248A': // ⒊ [DIGIT THREE FULL STOP] + output[outputPos++] = '3'; + output[outputPos++] = '.'; + break; + case '\u2476': // ⑶ [PARENTHESIZED DIGIT THREE] + output[outputPos++] = '('; + output[outputPos++] = '3'; + output[outputPos++] = ')'; + break; + case '\u2074': // ⁴ [SUPERSCRIPT FOUR] + case '\u2084': // ₄ [SUBSCRIPT FOUR] + case '\u2463': // ④ [CIRCLED DIGIT FOUR] + case '\u24F8': // ⓸ [DOUBLE CIRCLED DIGIT FOUR] + case '\u2779': // ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] + case '\u2783': // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] + case '\u278D': // ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] + case '\uFF14': // 4 [FULLWIDTH DIGIT FOUR] + output[outputPos++] = '4'; + break; + case '\u248B': // ⒋ [DIGIT FOUR FULL STOP] + output[outputPos++] = '4'; + output[outputPos++] = '.'; + break; + case '\u2477': // ⑷ [PARENTHESIZED DIGIT FOUR] + output[outputPos++] = '('; + output[outputPos++] = '4'; + output[outputPos++] = ')'; + break; + case '\u2075': // ⁵ [SUPERSCRIPT FIVE] + case '\u2085': // ₅ [SUBSCRIPT FIVE] + case '\u2464': // ⑤ [CIRCLED DIGIT FIVE] + case '\u24F9': // ⓹ [DOUBLE CIRCLED DIGIT FIVE] + case '\u277A': // ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] + case '\u2784': // ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] + case '\u278E': // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] + case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE] + output[outputPos++] = '5'; + break; + case '\u248C': // ⒌ [DIGIT FIVE FULL STOP] + output[outputPos++] = '5'; + output[outputPos++] = '.'; + break; + case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE] + output[outputPos++] = '('; + output[outputPos++] = '5'; + output[outputPos++] = ')'; + break; + case '\u2076': // ⁶ [SUPERSCRIPT SIX] + case '\u2086': // ₆ [SUBSCRIPT SIX] + case '\u2465': // ⑥ [CIRCLED DIGIT SIX] + case '\u24FA': // ⓺ [DOUBLE CIRCLED DIGIT SIX] + case '\u277B': // ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX] + case '\u2785': // ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] + case '\u278F': // ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] + case '\uFF16': // 6 [FULLWIDTH DIGIT SIX] + output[outputPos++] = '6'; + break; + case '\u248D': // ⒍ [DIGIT SIX FULL STOP] + output[outputPos++] = '6'; + output[outputPos++] = '.'; + break; + case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX] + output[outputPos++] = '('; + output[outputPos++] = '6'; + output[outputPos++] = ')'; + break; + case '\u2077': // ⁷ [SUPERSCRIPT SEVEN] + case '\u2087': // ₇ [SUBSCRIPT SEVEN] + case '\u2466': // ⑦ [CIRCLED DIGIT SEVEN] + case '\u24FB': // ⓻ [DOUBLE CIRCLED DIGIT SEVEN] + case '\u277C': // ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] + case '\u2786': // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] + case '\u2790': // ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] + case '\uFF17': // 7 [FULLWIDTH DIGIT SEVEN] + output[outputPos++] = '7'; + break; + case '\u248E': // ⒎ [DIGIT SEVEN FULL STOP] + output[outputPos++] = '7'; + output[outputPos++] = '.'; + break; + case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN] + output[outputPos++] = '('; + output[outputPos++] = '7'; + output[outputPos++] = ')'; + break; + case '\u2078': // ⁸ [SUPERSCRIPT EIGHT] + case '\u2088': // ₈ [SUBSCRIPT EIGHT] + case '\u2467': // ⑧ [CIRCLED DIGIT EIGHT] + case '\u24FC': // ⓼ [DOUBLE CIRCLED DIGIT EIGHT] + case '\u277D': // ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] + case '\u2787': // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] + case '\u2791': // ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] + case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT] + output[outputPos++] = '8'; + break; + case '\u248F': // ⒏ [DIGIT EIGHT FULL STOP] + output[outputPos++] = '8'; + output[outputPos++] = '.'; + break; + case '\u247B': // ⑻ [PARENTHESIZED DIGIT EIGHT] + output[outputPos++] = '('; + output[outputPos++] = '8'; + output[outputPos++] = ')'; + break; + case '\u2079': // ⁹ [SUPERSCRIPT NINE] + case '\u2089': // ₉ [SUBSCRIPT NINE] + case '\u2468': // ⑨ [CIRCLED DIGIT NINE] + case '\u24FD': // ⓽ [DOUBLE CIRCLED DIGIT NINE] + case '\u277E': // ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE] + case '\u2788': // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] + case '\u2792': // ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] + case '\uFF19': // 9 [FULLWIDTH DIGIT NINE] + output[outputPos++] = '9'; + break; + case '\u2490': // ⒐ [DIGIT NINE FULL STOP] + output[outputPos++] = '9'; + output[outputPos++] = '.'; + break; + case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE] + output[outputPos++] = '('; + output[outputPos++] = '9'; + output[outputPos++] = ')'; + break; + case '\u2469': // ⑩ [CIRCLED NUMBER TEN] + case '\u24FE': // ⓾ [DOUBLE CIRCLED NUMBER TEN] + case '\u277F': // ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN] + case '\u2789': // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] + case '\u2793': // ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] + output[outputPos++] = '1'; + output[outputPos++] = '0'; + break; + case '\u2491': // ⒑ [NUMBER TEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '0'; + output[outputPos++] = '.'; + break; + case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '0'; + output[outputPos++] = ')'; + break; + case '\u246A': // ⑪ [CIRCLED NUMBER ELEVEN] + case '\u24EB': // ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN] + output[outputPos++] = '1'; + output[outputPos++] = '1'; + break; + case '\u2492': // ⒒ [NUMBER ELEVEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '1'; + output[outputPos++] = '.'; + break; + case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '1'; + output[outputPos++] = ')'; + break; + case '\u246B': // ⑫ [CIRCLED NUMBER TWELVE] + case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] + output[outputPos++] = '1'; + output[outputPos++] = '2'; + break; + case '\u2493': // ⒓ [NUMBER TWELVE FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '2'; + output[outputPos++] = '.'; + break; + case '\u247F': // ⑿ [PARENTHESIZED NUMBER TWELVE] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '2'; + output[outputPos++] = ')'; + break; + case '\u246C': // ⑬ [CIRCLED NUMBER THIRTEEN] + case '\u24ED': // ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN] + output[outputPos++] = '1'; + output[outputPos++] = '3'; + break; + case '\u2494': // ⒔ [NUMBER THIRTEEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '3'; + output[outputPos++] = '.'; + break; + case '\u2480': // ⒀ [PARENTHESIZED NUMBER THIRTEEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '3'; + output[outputPos++] = ')'; + break; + case '\u246D': // ⑭ [CIRCLED NUMBER FOURTEEN] + case '\u24EE': // ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN] + output[outputPos++] = '1'; + output[outputPos++] = '4'; + break; + case '\u2495': // ⒕ [NUMBER FOURTEEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '4'; + output[outputPos++] = '.'; + break; + case '\u2481': // ⒁ [PARENTHESIZED NUMBER FOURTEEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '4'; + output[outputPos++] = ')'; + break; + case '\u246E': // ⑮ [CIRCLED NUMBER FIFTEEN] + case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] + output[outputPos++] = '1'; + output[outputPos++] = '5'; + break; + case '\u2496': // ⒖ [NUMBER FIFTEEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '5'; + output[outputPos++] = '.'; + break; + case '\u2482': // ⒂ [PARENTHESIZED NUMBER FIFTEEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '5'; + output[outputPos++] = ')'; + break; + case '\u246F': // ⑯ [CIRCLED NUMBER SIXTEEN] + case '\u24F0': // ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN] + output[outputPos++] = '1'; + output[outputPos++] = '6'; + break; + case '\u2497': // ⒗ [NUMBER SIXTEEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '6'; + output[outputPos++] = '.'; + break; + case '\u2483': // ⒃ [PARENTHESIZED NUMBER SIXTEEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '6'; + output[outputPos++] = ')'; + break; + case '\u2470': // ⑰ [CIRCLED NUMBER SEVENTEEN] + case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] + output[outputPos++] = '1'; + output[outputPos++] = '7'; + break; + case '\u2498': // ⒘ [NUMBER SEVENTEEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '7'; + output[outputPos++] = '.'; + break; + case '\u2484': // ⒄ [PARENTHESIZED NUMBER SEVENTEEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '7'; + output[outputPos++] = ')'; + break; + case '\u2471': // ⑱ [CIRCLED NUMBER EIGHTEEN] + case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] + output[outputPos++] = '1'; + output[outputPos++] = '8'; + break; + case '\u2499': // ⒙ [NUMBER EIGHTEEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '8'; + output[outputPos++] = '.'; + break; + case '\u2485': // ⒅ [PARENTHESIZED NUMBER EIGHTEEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '8'; + output[outputPos++] = ')'; + break; + case '\u2472': // ⑲ [CIRCLED NUMBER NINETEEN] + case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] + output[outputPos++] = '1'; + output[outputPos++] = '9'; + break; + case '\u249A': // ⒚ [NUMBER NINETEEN FULL STOP] + output[outputPos++] = '1'; + output[outputPos++] = '9'; + output[outputPos++] = '.'; + break; + case '\u2486': // ⒆ [PARENTHESIZED NUMBER NINETEEN] + output[outputPos++] = '('; + output[outputPos++] = '1'; + output[outputPos++] = '9'; + output[outputPos++] = ')'; + break; + case '\u2473': // ⑳ [CIRCLED NUMBER TWENTY] + case '\u24F4': // ⓴ [NEGATIVE CIRCLED NUMBER TWENTY] + output[outputPos++] = '2'; + output[outputPos++] = '0'; + break; + case '\u249B': // ⒛ [NUMBER TWENTY FULL STOP] + output[outputPos++] = '2'; + output[outputPos++] = '0'; + output[outputPos++] = '.'; + break; + case '\u2487': // ⒇ [PARENTHESIZED NUMBER TWENTY] + output[outputPos++] = '('; + output[outputPos++] = '2'; + output[outputPos++] = '0'; + output[outputPos++] = ')'; + break; + case '\u00AB': // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] + case '\u00BB': // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] + case '\u201C': // “ [LEFT DOUBLE QUOTATION MARK] + case '\u201D': // ” [RIGHT DOUBLE QUOTATION MARK] + case '\u201E': // „ [DOUBLE LOW-9 QUOTATION MARK] + case '\u2033': // ″ [DOUBLE PRIME] + case '\u2036': // ‶ [REVERSED DOUBLE PRIME] + case '\u275D': // ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] + case '\u275E': // ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] + case '\u276E': // ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] + case '\u276F': // ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] + case '\uFF02': // " [FULLWIDTH QUOTATION MARK] + output[outputPos++] = '"'; + break; + case '\u2018': // ‘ [LEFT SINGLE QUOTATION MARK] + case '\u2019': // ’ [RIGHT SINGLE QUOTATION MARK] + case '\u201A': // ‚ [SINGLE LOW-9 QUOTATION MARK] + case '\u201B': // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] + case '\u2032': // ′ [PRIME] + case '\u2035': // ‵ [REVERSED PRIME] + case '\u2039': // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] + case '\u203A': // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] + case '\u275B': // ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] + case '\u275C': // ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] + case '\uFF07': // ' [FULLWIDTH APOSTROPHE] + output[outputPos++] = '\''; + break; + case '\u2010': // ‐ [HYPHEN] + case '\u2011': // ‑ [NON-BREAKING HYPHEN] + case '\u2012': // ‒ [FIGURE DASH] + case '\u2013': // – [EN DASH] + case '\u2014': // — [EM DASH] + case '\u207B': // ⁻ [SUPERSCRIPT MINUS] + case '\u208B': // ₋ [SUBSCRIPT MINUS] + case '\uFF0D': // - [FULLWIDTH HYPHEN-MINUS] + output[outputPos++] = '-'; + break; + case '\u2045': // ⁅ [LEFT SQUARE BRACKET WITH QUILL] + case '\u2772': // ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] + case '\uFF3B': // [ [FULLWIDTH LEFT SQUARE BRACKET] + output[outputPos++] = '['; + break; + case '\u2046': // ⁆ [RIGHT SQUARE BRACKET WITH QUILL] + case '\u2773': // ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] + case '\uFF3D': // ] [FULLWIDTH RIGHT SQUARE BRACKET] + output[outputPos++] = ']'; + break; + case '\u207D': // ⁽ [SUPERSCRIPT LEFT PARENTHESIS] + case '\u208D': // ₍ [SUBSCRIPT LEFT PARENTHESIS] + case '\u2768': // ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT] + case '\u276A': // ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] + case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS] + output[outputPos++] = '('; + break; + case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS] + output[outputPos++] = '('; + output[outputPos++] = '('; + break; + case '\u207E': // ⁾ [SUPERSCRIPT RIGHT PARENTHESIS] + case '\u208E': // ₎ [SUBSCRIPT RIGHT PARENTHESIS] + case '\u2769': // ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT] + case '\u276B': // ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] + case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS] + output[outputPos++] = ')'; + break; + case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS] + output[outputPos++] = ')'; + output[outputPos++] = ')'; + break; + case '\u276C': // ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] + case '\u2770': // ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] + case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN] + output[outputPos++] = '<'; + break; + case '\u276D': // ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] + case '\u2771': // ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] + case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN] + output[outputPos++] = '>'; + break; + case '\u2774': // ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT] + case '\uFF5B': // { [FULLWIDTH LEFT CURLY BRACKET] + output[outputPos++] = '{'; + break; + case '\u2775': // ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT] + case '\uFF5D': // } [FULLWIDTH RIGHT CURLY BRACKET] + output[outputPos++] = '}'; + break; + case '\u207A': // ⁺ [SUPERSCRIPT PLUS SIGN] + case '\u208A': // ₊ [SUBSCRIPT PLUS SIGN] + case '\uFF0B': // + [FULLWIDTH PLUS SIGN] + output[outputPos++] = '+'; + break; + case '\u207C': // ⁼ [SUPERSCRIPT EQUALS SIGN] + case '\u208C': // ₌ [SUBSCRIPT EQUALS SIGN] + case '\uFF1D': // = [FULLWIDTH EQUALS SIGN] + output[outputPos++] = '='; + break; + case '\uFF01': // ! [FULLWIDTH EXCLAMATION MARK] + output[outputPos++] = '!'; + break; + case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK] + output[outputPos++] = '!'; + output[outputPos++] = '!'; + break; + case '\u2049': // ⁉ [EXCLAMATION QUESTION MARK] + output[outputPos++] = '!'; + output[outputPos++] = '?'; + break; + case '\uFF03': // # [FULLWIDTH NUMBER SIGN] + output[outputPos++] = '#'; + break; + case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN] + output[outputPos++] = '$'; + break; + case '\u2052': // ⁒ [COMMERCIAL MINUS SIGN] + case '\uFF05': // % [FULLWIDTH PERCENT SIGN] + output[outputPos++] = '%'; + break; + case '\uFF06': // & [FULLWIDTH AMPERSAND] + output[outputPos++] = '&'; + break; + case '\u204E': // ⁎ [LOW ASTERISK] + case '\uFF0A': // * [FULLWIDTH ASTERISK] + output[outputPos++] = '*'; + break; + case '\uFF0C': // , [FULLWIDTH COMMA] + output[outputPos++] = ','; + break; + case '\uFF0E': // . [FULLWIDTH FULL STOP] + output[outputPos++] = '.'; + break; + case '\u2044': // ⁄ [FRACTION SLASH] + case '\uFF0F': // / [FULLWIDTH SOLIDUS] + output[outputPos++] = '/'; + break; + case '\uFF1A': // : [FULLWIDTH COLON] + output[outputPos++] = ':'; + break; + case '\u204F': // ⁏ [REVERSED SEMICOLON] + case '\uFF1B': // ; [FULLWIDTH SEMICOLON] + output[outputPos++] = ';'; + break; + case '\uFF1F': // ? [FULLWIDTH QUESTION MARK] + output[outputPos++] = '?'; + break; + case '\u2047': // ⁇ [DOUBLE QUESTION MARK] + output[outputPos++] = '?'; + output[outputPos++] = '?'; + break; + case '\u2048': // ⁈ [QUESTION EXCLAMATION MARK] + output[outputPos++] = '?'; + output[outputPos++] = '!'; + break; + case '\uFF20': // @ [FULLWIDTH COMMERCIAL AT] + output[outputPos++] = '@'; + break; + case '\uFF3C': // \ [FULLWIDTH REVERSE SOLIDUS] + output[outputPos++] = '\\'; + break; + case '\u2038': // ‸ [CARET] + case '\uFF3E': // ^ [FULLWIDTH CIRCUMFLEX ACCENT] + output[outputPos++] = '^'; + break; + case '\uFF3F': // _ [FULLWIDTH LOW LINE] + output[outputPos++] = '_'; + break; + case '\u2053': // ⁓ [SWUNG DASH] + case '\uFF5E': // ~ [FULLWIDTH TILDE] + output[outputPos++] = '~'; + break; + default: + output[outputPos++] = c; + break; + } + } + } + } +} Property changes on: src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java ___________________________________________________________________ Name: svn:mime-type + text/plain Name: svn:eol-style + native