Index: modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java =================================================================== --- modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java (revision 0) +++ modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java (revision 0) @@ -0,0 +1,76 @@ +package org.apache.lucene.analysis.icu; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashMap; + +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; + +/** creates a macro to augment jflex's unicode wordbreak support for > BMP */ +public class GenerateJFlexSupplementaryMacros { + private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]"); + + public static void main(String args[]) throws Exception { + outputMacro("ALetterSupp", "[:WordBreak=ALetter:]"); + outputMacro("FormatSupp", "[:WordBreak=Format:]"); + outputMacro("ExtendSupp", "[:WordBreak=Extend:]"); + outputMacro("NumericSupp", "[:WordBreak=Numeric:]"); + outputMacro("KatakanaSupp", "[:WordBreak=Katakana:]"); + outputMacro("MidLetterSupp", "[:WordBreak=MidLetter:]"); + outputMacro("MidNumSupp", "[:WordBreak=MidNum:]"); + outputMacro("MidNumLetSupp", "[:WordBreak=MidNumLet:]"); + outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); + outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); + outputMacro("ComplexContextSupp", "[:LineBreak=Complex_Context:]"); + outputMacro("HanSupp", "[:Script=Han:]"); + outputMacro("HiraganaSupp", "[:Script=Hiragana:]"); + } + + // we have to carefully output the possibilities as compact utf-16 + // range expressions, or jflex will OOM! + static void outputMacro(String name, String pattern) { + UnicodeSet set = new UnicodeSet(pattern); + set.removeAll(BMP); + System.out.println(name + " = ("); + // if the set is empty, we have to do this or jflex will barf + if (set.isEmpty()) { + System.out.println("\t []"); + } + + HashMap utf16ByLead = new HashMap(); + for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { + char utf16[] = Character.toChars(it.codepoint); + UnicodeSet trails = utf16ByLead.get(utf16[0]); + if (trails == null) { + trails = new UnicodeSet(); + utf16ByLead.put(utf16[0], trails); + } + trails.add(utf16[1]); + } + + boolean isFirst = true; + for (Character c : utf16ByLead.keySet()) { + UnicodeSet trail = utf16ByLead.get(c); + System.out.print( isFirst ? "\t " : "\t| "); + isFirst = false; + System.out.println("([\\u" + Integer.toHexString(c) + "]" + trail.getRegexEquivalent() + ")"); + } + System.out.println(")"); + } +} Property changes on: modules\analysis\icu\src\tools\java\org\apache\lucene\analysis\icu\GenerateJFlexSupplementaryMacros.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (revision 1055272) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (working copy) @@ -201,4 +201,10 @@ WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0(); wordBreakTest.test(a); } + + public void testSupplementary() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱鍟䇹愯瀛", + new String[] {"𩬅", "艱", "鍟", "䇹", "愯", "瀛"}, + new String[] { "", "", "", "", "", "" }); + } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (revision 1055272) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (working copy) @@ -33,14 +33,6 @@ *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • * - * WARNING: Because JFlex does not support Unicode supplementary - * characters (characters above the Basic Multilingual Plane, which contains - * those up to and including U+FFFF), this scanner will not recognize them - * properly. If you need to be able to process text containing supplementary - * characters, consider using the ICU4J-backed implementation in modules/analysis/icu - * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer) - * instead of this class, since the ICU4J-backed implementation does not have - * this limitation. */ %% @@ -53,15 +45,29 @@ %function getNextToken %char +%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro +ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) +Format = ([\p{WB:Format}] | {FormatSupp}) +Numeric = ([\p{WB:Numeric}] | {NumericSupp}) +Extend = ([\p{WB:Extend}] | {ExtendSupp}) +Katakana = ([\p{WB:Katakana}] | {KatakanaSupp}) +MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp}) +MidNum = ([\p{WB:MidNum}] | {MidNumSupp}) +MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp}) +ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp}) +ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp}) +Han = ([\p{Script:Han}] | {HanSupp}) +Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp}) + // UAX#29 WB4. X (Extend | Format)* --> X // -ALetterEx = \p{WB:ALetter} [\p{WB:Format}\p{WB:Extend}]* +ALetterEx = {ALetter} ({Format} | {Extend})* // TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it -NumericEx = [\p{WB:Numeric}\uFF10-\uFF19] [\p{WB:Format}\p{WB:Extend}]* -KatakanaEx = \p{WB:Katakana} [\p{WB:Format}\p{WB:Extend}]* -MidLetterEx = [\p{WB:MidLetter}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* -MidNumericEx = [\p{WB:MidNum}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* -ExtendNumLetEx = \p{WB:ExtendNumLet} [\p{WB:Format}\p{WB:Extend}]* +NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})* +KatakanaEx = {Katakana} ({Format} | {Extend})* +MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})* +MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})* +ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* %{ @@ -156,12 +162,12 @@ // // http://www.unicode.org/reports/tr14/#SA // -\p{LB:Complex_Context}+ { return SOUTH_EAST_ASIAN_TYPE; } +{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; } // UAX#29 WB14. Any ÷ Any // -\p{Script:Han} { return IDEOGRAPHIC_TYPE; } -\p{Script:Hiragana} { return HIRAGANA_TYPE; } +{Han} { return IDEOGRAPHIC_TYPE; } +{Hiragana} { return HIRAGANA_TYPE; } // UAX#29 WB3. CR × LF Index: modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (revision 1055272) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (working copy) @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 12/4/10 7:24 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/5/11 1:38 PM */ package org.apache.lucene.analysis.standard; @@ -35,14 +35,6 @@ *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • * - * WARNING: Because JFlex does not support Unicode supplementary - * characters (characters above the Basic Multilingual Plane, which contains - * those up to and including U+FFFF), this scanner will not recognize them - * properly. If you need to be able to process text containing supplementary - * characters, consider using the ICU4J-backed implementation in modules/analysis/icu - * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer) - * instead of this class, since the ICU4J-backed implementation does not have - * this limitation. */ public final class StandardTokenizerImpl implements StandardTokenizerInterface { @@ -70,115 +62,138 @@ * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\47\0\1\7\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6"+ - "\5\0\32\1\4\0\1\10\1\0\32\1\57\0\1\1\2\0\1\2"+ - "\7\0\1\1\1\0\1\5\2\0\1\1\5\0\27\1\1\0\37\1"+ - "\1\0\u01ca\1\4\0\14\1\16\0\5\1\7\0\1\1\1\0\1\1"+ - "\21\0\160\2\5\1\1\0\2\1\2\0\4\1\1\6\7\0\1\1"+ - "\1\5\3\1\1\0\1\1\1\0\24\1\1\0\123\1\1\0\213\1"+ - "\1\0\7\2\236\1\11\0\46\1\2\0\1\1\7\0\47\1\1\0"+ - "\1\6\7\0\55\2\1\0\1\2\1\0\2\2\1\0\2\2\1\0"+ - "\1\2\10\0\33\1\5\0\4\1\1\5\13\0\4\2\10\0\2\6"+ - "\2\0\13\2\5\0\53\1\25\2\12\3\1\0\1\3\1\6\1\0"+ - "\2\1\1\2\143\1\1\0\1\1\10\2\1\0\6\2\2\1\2\2"+ - "\1\0\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2\1\1"+ - "\1\2\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3\41\1"+ - "\11\2\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2\1\1"+ - "\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\244\0\4\2"+ - "\66\1\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0\12\3"+ - "\1\0\7\1\1\0\7\1\1\0\3\2\1\0\10\1\2\0\2\1"+ - "\2\0\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0\1\2"+ - "\1\1\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2\4\0"+ - "\2\1\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2\1\0"+ - "\6\1\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1\1\0"+ - "\2\1\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2\2\0"+ - "\3\2\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3\2\2"+ - "\3\1\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0\26\1"+ - "\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\10\2"+ - "\1\0\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2\2\0"+ - "\12\3\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0"+ - "\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0"+ - "\2\2\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2"+ - "\2\0\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0"+ - "\3\1\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0"+ - "\2\1\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0"+ - "\4\2\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2\1\0"+ - "\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\3\0"+ - "\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0\2\1"+ - "\6\0\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0"+ - "\3\1\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2\1\1"+ - "\7\2\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1\1\0"+ - "\2\1\2\2\2\0\12\3\1\0\2\1\17\0\2\2\1\0\10\1"+ - "\1\0\3\1\1\0\51\1\2\0\1\1\7\2\1\0\3\2\1\0"+ - "\4\2\1\1\10\0\1\2\10\0\2\1\2\2\2\0\12\3\12\0"+ - "\6\1\2\0\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0"+ - "\1\1\2\0\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0"+ - "\10\2\22\0\2\2\15\0\60\11\1\12\2\11\7\12\5\0\7\11"+ - "\10\12\1\0\12\3\47\0\2\11\1\0\1\11\2\0\2\11\1\0"+ - "\1\11\2\0\1\11\6\0\4\11\1\0\7\11\1\0\3\11\1\0"+ - "\1\11\1\0\1\11\2\0\2\11\1\0\4\11\1\12\2\11\6\12"+ - "\1\0\2\12\1\11\2\0\5\11\1\0\1\11\1\0\6\12\2\0"+ - "\12\3\2\0\2\11\42\0\1\1\27\0\2\2\6\0\12\3\13\0"+ - "\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1"+ - "\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0\1\2"+ - "\71\0\53\11\24\12\1\11\12\3\6\0\6\11\4\12\4\11\3\12"+ - "\1\11\3\12\2\11\7\12\3\11\4\12\15\11\14\12\1\11\1\12"+ - "\12\3\4\12\2\11\46\1\12\0\53\1\1\0\1\1\3\0\u0149\1"+ - "\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1"+ - "\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1"+ - "\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1"+ - "\2\0\3\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1"+ - "\1\0\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0\4\1"+ - "\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1\1\0"+ - "\3\1\1\0\2\2\14\0\64\11\40\12\3\0\1\11\4\0\1\11"+ - "\1\12\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1\10\0"+ - "\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2\4\0"+ - "\14\2\12\0\12\3\36\11\2\0\5\11\13\0\54\11\4\0\21\12"+ - "\7\11\2\12\6\0\12\3\1\11\3\0\2\11\40\0\27\1\5\2"+ - "\4\0\65\11\12\12\1\0\35\12\2\0\1\2\12\3\6\0\12\3"+ - "\6\0\16\11\122\0\5\2\57\1\21\2\7\1\4\0\12\3\21\0"+ - "\11\2\14\0\3\2\36\1\12\2\3\0\2\1\12\3\6\0\46\1"+ - "\16\2\14\0\44\1\24\2\10\0\12\3\3\0\3\1\12\3\44\1"+ - "\122\0\3\2\1\0\25\2\4\1\1\2\4\1\1\2\15\0\300\1"+ - "\47\2\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1"+ - "\2\0\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1"+ - "\2\0\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1"+ - "\3\0\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1"+ - "\17\0\4\2\10\0\2\7\12\0\1\7\2\0\1\5\2\0\5\2"+ - "\20\0\2\10\3\0\1\6\17\0\1\10\13\0\5\2\5\0\6\2"+ - "\1\0\1\1\15\0\1\1\20\0\15\1\63\0\41\2\21\0\1\1"+ - "\4\0\1\1\2\0\12\1\1\0\1\1\3\0\5\1\6\0\1\1"+ - "\1\0\1\1\1\0\1\1\1\0\4\1\1\0\13\1\2\0\4\1"+ - "\5\0\5\1\4\0\1\1\21\0\51\1\u032d\0\64\1\u0716\0\57\1"+ - "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\16\0\46\1\12\0"+ - "\66\1\11\0\1\1\17\0\1\2\27\1\11\0\7\1\1\0\7\1"+ - "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+ - "\1\0\7\1\1\0\40\2\57\0\1\1\120\0\32\13\1\0\131\13"+ - "\14\0\326\13\57\0\1\1\1\0\1\13\31\0\11\13\6\2\1\0"+ - "\5\4\2\0\3\13\1\1\1\1\4\0\126\14\2\0\2\2\2\4"+ - "\3\14\133\4\1\0\4\4\5\0\51\1\3\0\136\1\21\0\33\1"+ - "\65\0\20\4\320\0\57\4\1\0\130\4\250\0\u19b6\13\112\0\u51cc\13"+ - "\64\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\3\2\1"+ - "\24\0\57\1\4\2\11\0\2\2\1\0\31\1\10\0\120\1\2\2"+ - "\45\0\11\1\2\0\147\1\2\0\4\1\1\0\2\1\16\0\12\1"+ - "\120\0\10\1\1\2\3\1\1\2\4\1\1\2\27\1\5\2\30\0"+ - "\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0\22\2\6\1"+ - "\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1\15\2\14\0"+ - "\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3\46\0\51\1"+ - "\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3\6\0\33\11"+ - "\1\12\4\0\60\11\1\12\1\11\3\12\2\11\2\12\5\11\2\12"+ - "\1\11\1\12\1\11\30\0\5\11\41\0\6\1\2\0\6\1\2\0"+ - "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+ - "\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u012e\13"+ - "\2\0\76\13\2\0\152\13\46\0\7\1\14\0\5\1\5\0\1\1"+ - "\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1\1\0\2\1"+ - "\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0\66\1"+ - "\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6\13\0\7\2"+ - "\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0\1\6\1\5"+ - "\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7\4\0\1\6"+ - "\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1\4\0\1\10"+ - "\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1\2\0\6\1"+ - "\2\0\6\1\2\0\3\1\34\0\3\2\4\0"; + "\47\0\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137"+ + "\5\0\32\132\4\0\1\141\1\0\32\132\57\0\1\132\2\0\1\133"+ + "\7\0\1\132\1\0\1\136\2\0\1\132\5\0\27\132\1\0\37\132"+ + "\1\0\u01ca\132\4\0\14\132\16\0\5\132\7\0\1\132\1\0\1\132"+ + "\21\0\160\133\5\132\1\0\2\132\2\0\4\132\1\137\7\0\1\132"+ + "\1\136\3\132\1\0\1\132\1\0\24\132\1\0\123\132\1\0\213\132"+ + "\1\0\7\133\236\132\11\0\46\132\2\0\1\132\7\0\47\132\1\0"+ + "\1\137\7\0\55\133\1\0\1\133\1\0\2\133\1\0\2\133\1\0"+ + "\1\133\10\0\33\132\5\0\4\132\1\136\13\0\4\133\10\0\2\137"+ + "\2\0\13\133\5\0\53\132\25\133\12\134\1\0\1\134\1\137\1\0"+ + "\2\132\1\133\143\132\1\0\1\132\7\133\1\133\1\0\6\133\2\132"+ + "\2\133\1\0\4\133\2\132\12\134\3\132\2\0\1\132\17\0\1\133"+ + "\1\132\1\133\36\132\33\133\2\0\131\132\13\133\1\132\16\0\12\134"+ + "\41\132\11\133\2\132\2\0\1\137\1\0\1\132\5\0\26\132\4\133"+ + "\1\132\11\133\1\132\3\133\1\132\5\133\22\0\31\132\3\133\244\0"+ + "\4\133\66\132\3\133\1\132\22\133\1\132\7\133\12\132\2\133\2\0"+ + "\12\134\1\0\7\132\1\0\7\132\1\0\3\133\1\0\10\132\2\0"+ + "\2\132\2\0\26\132\1\0\7\132\1\0\1\132\3\0\4\132\2\0"+ + "\1\133\1\132\7\133\2\0\2\133\2\0\3\133\1\132\10\0\1\133"+ + "\4\0\2\132\1\0\3\132\2\133\2\0\12\134\2\132\17\0\3\133"+ + "\1\0\6\132\4\0\2\132\2\0\26\132\1\0\7\132\1\0\2\132"+ + "\1\0\2\132\1\0\2\132\2\0\1\133\1\0\5\133\4\0\2\133"+ + "\2\0\3\133\3\0\1\133\7\0\4\132\1\0\1\132\7\0\12\134"+ + "\2\133\3\132\1\133\13\0\3\133\1\0\11\132\1\0\3\132\1\0"+ + "\26\132\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132"+ + "\10\133\1\0\3\133\1\0\3\133\2\0\1\132\17\0\2\132\2\133"+ + "\2\0\12\134\21\0\3\133\1\0\10\132\2\0\2\132\2\0\26\132"+ + "\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132\7\133"+ + "\2\0\2\133\2\0\3\133\10\0\2\133\4\0\2\132\1\0\3\132"+ + "\2\133\2\0\12\134\1\0\1\132\20\0\1\133\1\132\1\0\6\132"+ + "\3\0\3\132\1\0\4\132\3\0\2\132\1\0\1\132\1\0\2\132"+ + "\3\0\2\132\3\0\3\132\3\0\14\132\4\0\5\133\3\0\3\133"+ + "\1\0\4\133\2\0\1\132\6\0\1\133\16\0\12\134\21\0\3\133"+ + "\1\0\10\132\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132"+ + "\3\0\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\1\0"+ + "\2\132\6\0\2\132\2\133\2\0\12\134\22\0\2\133\1\0\10\132"+ + "\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132\2\0\1\133"+ + "\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\7\0\1\132"+ + "\1\0\2\132\2\133\2\0\12\134\1\0\2\132\17\0\2\133\1\0"+ + "\10\132\1\0\3\132\1\0\51\132\2\0\1\132\7\133\1\0\3\133"+ + "\1\0\4\133\1\132\10\0\1\133\10\0\2\132\2\133\2\0\12\134"+ + "\12\0\6\132\2\0\2\133\1\0\22\132\3\0\30\132\1\0\11\132"+ + "\1\0\1\132\2\0\7\132\3\0\1\133\4\0\6\133\1\0\1\133"+ + "\1\0\10\133\22\0\2\133\15\0\60\142\1\143\2\142\7\143\5\0"+ + "\7\142\10\143\1\0\12\134\47\0\2\142\1\0\1\142\2\0\2\142"+ + "\1\0\1\142\2\0\1\142\6\0\4\142\1\0\7\142\1\0\3\142"+ + "\1\0\1\142\1\0\1\142\2\0\2\142\1\0\4\142\1\143\2\142"+ + "\6\143\1\0\2\143\1\142\2\0\5\142\1\0\1\142\1\0\6\143"+ + "\2\0\12\134\2\0\2\142\42\0\1\132\27\0\2\133\6\0\12\134"+ + "\13\0\1\133\1\0\1\133\1\0\1\133\4\0\2\133\10\132\1\0"+ + "\44\132\4\0\24\133\1\0\2\133\5\132\13\133\1\0\44\133\11\0"+ + "\1\133\71\0\53\142\24\143\1\142\12\134\6\0\6\142\4\143\4\142"+ + "\3\143\1\142\3\143\2\142\7\143\3\142\4\143\15\142\14\143\1\142"+ + "\1\143\12\134\4\143\2\142\46\132\12\0\53\132\1\0\1\132\3\0"+ + "\u0149\132\1\0\4\132\2\0\7\132\1\0\1\132\1\0\4\132\2\0"+ + "\51\132\1\0\4\132\2\0\41\132\1\0\4\132\2\0\7\132\1\0"+ + "\1\132\1\0\4\132\2\0\17\132\1\0\71\132\1\0\4\132\2\0"+ + "\103\132\2\0\3\133\40\0\20\132\20\0\125\132\14\0\u026c\132\2\0"+ + "\21\132\1\0\32\132\5\0\113\132\3\0\3\132\17\0\15\132\1\0"+ + "\4\132\3\133\13\0\22\132\3\133\13\0\22\132\2\133\14\0\15\132"+ + "\1\0\3\132\1\0\2\133\14\0\64\142\2\143\36\143\3\0\1\142"+ + "\4\0\1\142\1\143\2\0\12\134\41\0\3\133\2\0\12\134\6\0"+ + "\130\132\10\0\51\132\1\133\1\132\5\0\106\132\12\0\35\132\3\0"+ + "\14\133\4\0\14\133\12\0\12\134\36\142\2\0\5\142\13\0\54\142"+ + "\4\0\21\143\7\142\2\143\6\0\12\134\1\142\3\0\2\142\40\0"+ + "\27\132\5\133\4\0\65\142\12\143\1\0\35\143\2\0\1\133\12\134"+ + "\6\0\12\134\6\0\16\142\122\0\5\133\57\132\21\133\7\132\4\0"+ + "\12\134\21\0\11\133\14\0\3\133\36\132\12\133\3\0\2\132\12\134"+ + "\6\0\46\132\16\133\14\0\44\132\24\133\10\0\12\134\3\0\3\132"+ + "\12\134\44\132\122\0\3\133\1\0\25\133\4\132\1\133\4\132\1\133"+ + "\15\0\300\132\47\133\25\0\4\133\u0116\132\2\0\6\132\2\0\46\132"+ + "\2\0\6\132\2\0\10\132\1\0\1\132\1\0\1\132\1\0\1\132"+ + "\1\0\37\132\2\0\65\132\1\0\7\132\1\0\1\132\3\0\3\132"+ + "\1\0\7\132\3\0\4\132\2\0\6\132\4\0\15\132\5\0\3\132"+ + "\1\0\7\132\17\0\2\133\2\133\10\0\2\140\12\0\1\140\2\0"+ + "\1\136\2\0\5\133\20\0\2\141\3\0\1\137\17\0\1\141\13\0"+ + "\5\133\5\0\6\133\1\0\1\132\15\0\1\132\20\0\15\132\63\0"+ + "\41\133\21\0\1\132\4\0\1\132\2\0\12\132\1\0\1\132\3\0"+ + "\5\132\6\0\1\132\1\0\1\132\1\0\1\132\1\0\4\132\1\0"+ + "\13\132\2\0\4\132\5\0\5\132\4\0\1\132\21\0\51\132\u032d\0"+ + "\64\132\u0716\0\57\132\1\0\57\132\1\0\205\132\6\0\4\132\3\133"+ + "\16\0\46\132\12\0\66\132\11\0\1\132\17\0\1\133\27\132\11\0"+ + "\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0"+ + "\7\132\1\0\7\132\1\0\7\132\1\0\40\133\57\0\1\132\120\0"+ + "\32\144\1\0\131\144\14\0\326\144\57\0\1\132\1\0\1\144\31\0"+ + "\11\144\6\133\1\0\5\135\2\0\3\144\1\132\1\132\4\0\126\145"+ + "\2\0\2\133\2\135\3\145\133\135\1\0\4\135\5\0\51\132\3\0"+ + "\136\132\21\0\33\132\65\0\20\135\320\0\57\135\1\0\130\135\250\0"+ + "\u19b6\144\112\0\u51cc\144\64\0\u048d\132\103\0\56\132\2\0\u010d\132\3\0"+ + "\20\132\12\134\2\132\24\0\57\132\4\133\11\0\2\133\1\0\31\132"+ + "\10\0\120\132\2\133\45\0\11\132\2\0\147\132\2\0\4\132\1\0"+ + "\2\132\16\0\12\132\120\0\10\132\1\133\3\132\1\133\4\132\1\133"+ + "\27\132\5\133\30\0\64\132\14\0\2\133\62\132\21\133\13\0\12\134"+ + "\6\0\22\133\6\132\3\0\1\132\4\0\12\134\34\132\10\133\2\0"+ + "\27\132\15\133\14\0\35\132\3\0\4\133\57\132\16\133\16\0\1\132"+ + "\12\134\46\0\51\132\16\133\11\0\3\132\1\133\10\132\2\133\2\0"+ + "\12\134\6\0\33\142\1\143\4\0\60\142\1\143\1\142\3\143\2\142"+ + "\2\143\5\142\2\143\1\142\1\143\1\142\30\0\5\142\41\0\6\132"+ + "\2\0\6\132\2\0\6\132\11\0\7\132\1\0\7\132\221\0\43\132"+ + "\10\133\1\0\2\133\2\0\12\134\6\0\u2ba4\132\14\0\27\132\4\0"+ + "\61\132\4\0\1\31\1\25\1\46\1\43\1\13\3\0\1\7\1\5"+ + "\2\0\1\3\1\1\14\0\1\11\21\0\1\112\7\0\1\65\1\17"+ + "\6\0\1\130\3\0\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\121\1\120\1\120\1\120\1\125\1\123"+ + "\17\0\1\114\u02c1\0\1\70\277\0\1\113\1\71\1\2\3\124\2\35"+ + "\1\124\1\35\2\124\1\14\21\124\2\60\7\73\1\72\7\73\7\52"+ + "\1\15\1\52\1\75\2\45\1\44\1\75\1\45\1\44\10\75\2\63"+ + "\5\61\2\54\5\61\1\6\10\37\5\21\3\27\12\106\20\27\3\42"+ + "\32\30\1\26\2\24\2\110\1\111\2\110\2\111\2\110\1\111\3\24"+ + "\1\16\2\24\12\64\1\74\1\41\1\34\1\64\6\41\1\34\66\41"+ + "\5\115\6\103\1\51\4\103\2\51\10\103\1\51\7\100\1\12\2\100"+ + "\32\103\1\12\4\100\1\12\5\102\1\101\1\102\3\101\7\102\1\101"+ + "\23\102\5\67\3\102\6\67\2\67\6\66\10\66\2\100\7\66\36\100"+ + "\4\66\102\100\15\115\1\77\2\115\1\131\3\117\1\115\2\117\5\115"+ + "\4\117\4\116\1\115\3\116\1\115\5\116\26\56\4\23\1\105\2\104"+ + "\4\122\1\104\2\122\3\76\33\122\35\55\3\122\35\126\3\122\6\126"+ + "\2\33\31\126\1\33\17\126\6\122\4\22\1\10\37\22\1\10\4\22"+ + "\25\62\1\127\11\62\21\55\5\62\1\57\12\40\13\62\4\55\1\50"+ + "\6\55\12\122\17\55\1\47\3\53\15\20\11\36\1\32\24\36\2\20"+ + "\11\36\1\32\31\36\1\32\4\20\4\36\2\32\2\107\1\4\5\107"+ + "\52\4\u1900\0\u012e\144\2\0\76\144\2\0\152\144\46\0\7\132\14\0"+ + "\5\132\5\0\1\132\1\133\12\132\1\0\15\132\1\0\5\132\1\0"+ + "\1\132\1\0\2\132\1\0\2\132\1\0\154\132\41\0\u016b\132\22\0"+ + "\100\132\2\0\66\132\50\0\14\132\4\0\20\133\1\137\2\0\1\136"+ + "\1\137\13\0\7\133\14\0\2\141\30\0\3\141\1\137\1\0\1\140"+ + "\1\0\1\137\1\136\32\0\5\132\1\0\207\132\2\0\1\133\7\0"+ + "\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137\5\0"+ + "\32\132\4\0\1\141\1\0\32\132\13\0\70\135\2\133\37\132\3\0"+ + "\6\132\2\0\6\132\2\0\6\132\2\0\3\132\34\0\3\133\4\0"; /** * Translates characters to character classes @@ -191,11 +206,11 @@ private static final int [] ZZ_ACTION = zzUnpackAction(); private static final String ZZ_ACTION_PACKED_0 = - "\1\0\1\1\1\2\1\3\1\2\1\1\1\4\1\5"+ - "\1\6\1\2\1\0\1\2\1\0\1\3\2\0"; + "\1\0\23\1\1\2\1\3\1\2\1\1\1\4\1\5"+ + "\1\6\15\0\1\2\1\0\1\2\10\0\1\3\61\0"; private static int [] zzUnpackAction() { - int [] result = new int[16]; + int [] result = new int[101]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -220,11 +235,22 @@ private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\15\0\32\0\47\0\64\0\101\0\116\0\15"+ - "\0\15\0\133\0\150\0\165\0\202\0\217\0\101\0\234"; + "\0\0\0\146\0\314\0\u0132\0\u0198\0\u01fe\0\u0264\0\u02ca"+ + "\0\u0330\0\u0396\0\u03fc\0\u0462\0\u04c8\0\u052e\0\u0594\0\u05fa"+ + "\0\u0660\0\u06c6\0\u072c\0\u0792\0\u07f8\0\u085e\0\u08c4\0\u092a"+ + "\0\u0990\0\146\0\146\0\314\0\u0132\0\u0198\0\u01fe\0\u0264"+ + "\0\u09f6\0\u0a5c\0\u0ac2\0\u0b28\0\u0462\0\u0b8e\0\u0bf4\0\u0c5a"+ + "\0\u0cc0\0\u0d26\0\u0d8c\0\u0df2\0\u0330\0\u0396\0\u0e58\0\u0ebe"+ + "\0\u0f24\0\u0f8a\0\u0ff0\0\u1056\0\u10bc\0\u1122\0\u1188\0\u11ee"+ + "\0\u1254\0\u12ba\0\u1320\0\u1386\0\u13ec\0\u1452\0\u14b8\0\u092a"+ + "\0\u151e\0\u1584\0\u15ea\0\u1650\0\u16b6\0\u171c\0\u1782\0\u17e8"+ + "\0\u184e\0\u18b4\0\u191a\0\u1980\0\u19e6\0\u1a4c\0\u1ab2\0\u1b18"+ + "\0\u1b7e\0\u1be4\0\u1c4a\0\u1cb0\0\u1d16\0\u1d7c\0\u1de2\0\u1e48"+ + "\0\u1eae\0\u1f14\0\u1f7a\0\u1fe0\0\u2046\0\u20ac\0\u2112\0\u2178"+ + "\0\u21de\0\u2244\0\u22aa\0\u2310\0\u2376"; private static int [] zzUnpackRowMap() { - int [] result = new int[16]; + int [] result = new int[101]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -247,21 +273,280 @@ private static final int [] ZZ_TRANS = zzUnpackTrans(); private static final String ZZ_TRANS_PACKED_0 = - "\1\2\1\3\1\2\1\4\1\5\3\2\1\6\2\7"+ - "\1\10\1\11\16\0\2\3\1\12\1\0\1\13\1\0"+ - "\1\13\1\14\1\0\1\3\3\0\1\3\2\4\2\0"+ - "\2\15\1\16\1\0\1\4\4\0\1\5\1\0\1\5"+ - "\3\0\1\14\1\0\1\5\3\0\1\3\1\17\1\4"+ - "\1\5\3\0\1\17\1\0\1\17\13\0\2\7\3\0"+ - "\1\3\2\12\2\0\2\20\1\14\1\0\1\12\3\0"+ - "\1\3\1\13\7\0\1\13\3\0\1\3\1\14\1\12"+ - "\1\5\3\0\1\14\1\0\1\14\4\0\1\15\1\4"+ - "\6\0\1\15\3\0\1\3\1\16\1\4\1\5\3\0"+ - "\1\16\1\0\1\16\4\0\1\20\1\12\6\0\1\20"+ - "\2\0"; + "\1\2\1\3\1\2\1\4\1\2\1\5\1\2\1\6"+ + "\1\2\1\7\1\2\1\10\3\2\1\11\5\2\1\12"+ + "\3\2\1\13\11\2\1\14\2\2\1\15\43\2\1\16"+ + "\1\2\1\17\3\2\1\20\1\21\1\2\1\22\1\2"+ + "\1\23\2\2\1\24\1\2\1\25\1\2\1\26\1\27"+ + "\3\2\1\30\2\31\1\32\1\33\150\0\1\25\11\0"+ + "\1\25\20\0\1\25\22\0\1\25\10\0\3\25\17\0"+ + "\1\25\10\0\1\25\23\0\1\25\1\0\1\25\1\0"+ + "\1\25\1\0\1\25\1\0\1\25\1\0\3\25\1\0"+ + "\5\25\1\0\3\25\1\0\11\25\1\0\2\25\1\0"+ + "\16\25\1\0\2\25\1\0\21\25\1\0\1\25\1\0"+ + "\3\25\2\0\1\25\1\0\1\25\1\0\2\25\1\0"+ + "\1\25\16\0\1\25\3\0\1\25\5\0\2\25\3\0"+ + "\1\25\13\0\1\25\1\0\1\25\4\0\2\25\4\0"+ + "\1\25\1\0\1\25\3\0\2\25\1\0\1\25\5\0"+ + "\3\25\1\0\1\25\15\0\1\25\10\0\1\25\23\0"+ + "\1\25\3\0\1\25\1\0\1\25\1\0\1\25\1\0"+ + "\3\25\2\0\4\25\1\0\3\25\2\0\3\25\1\0"+ + "\4\25\1\0\2\25\2\0\3\25\1\0\11\25\1\0"+ + "\2\25\1\0\16\25\1\0\2\25\1\0\1\25\1\0"+ + "\3\25\2\0\1\25\1\0\1\25\1\0\2\25\1\0"+ + "\1\25\16\0\1\25\3\0\1\25\3\0\1\25\1\0"+ + "\3\25\2\0\1\25\1\0\2\25\1\0\3\25\3\0"+ + "\2\25\1\0\1\25\1\0\2\25\1\0\2\25\3\0"+ + "\2\25\1\0\1\25\1\0\1\25\1\0\2\25\1\0"+ + "\2\25\1\0\2\25\1\0\5\25\1\0\5\25\1\0"+ + "\2\25\1\0\2\25\1\0\1\25\1\0\3\25\4\0"+ + "\1\25\4\0\1\25\30\0\3\25\5\0\1\25\1\0"+ + "\1\25\1\0\1\25\4\0\1\25\14\0\1\25\5\0"+ + "\1\25\11\0\2\25\12\0\1\26\1\0\2\25\12\0"+ + "\1\25\23\0\1\25\1\0\1\26\7\0\2\25\2\0"+ + "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+ + "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+ + "\4\25\1\0\5\25\1\26\1\0\1\25\1\0\1\25"+ + "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\1\25"+ + "\2\0\1\25\16\0\1\25\3\0\1\25\5\0\2\25"+ + "\3\0\1\25\4\0\3\25\4\0\1\25\1\0\1\25"+ + "\2\0\1\25\1\0\2\25\4\0\1\25\1\0\1\25"+ + "\3\0\2\25\1\0\1\25\5\0\3\25\1\0\1\25"+ + "\10\0\1\25\1\0\2\26\1\0\1\25\10\0\1\25"+ + "\23\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+ + "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+ + "\1\0\3\25\3\0\2\25\4\0\3\25\1\0\1\25"+ + "\10\0\1\25\1\0\2\25\20\0\1\25\11\0\2\25"+ + "\17\0\1\25\6\0\2\25\4\0\1\25\5\0\1\25"+ + "\2\0\1\25\5\0\3\25\1\0\1\25\15\0\1\25"+ + "\10\0\1\25\23\0\1\25\3\0\1\25\5\0\1\25"+ + "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+ + "\7\0\1\25\2\0\1\25\5\0\1\25\2\0\1\25"+ + "\1\0\1\25\105\0\1\33\21\0\1\27\34\0\1\32"+ + "\3\0\1\32\3\0\1\32\1\0\3\32\2\0\1\32"+ + "\2\0\1\32\1\0\3\32\3\0\2\32\1\0\1\32"+ + "\1\0\2\32\1\0\2\32\3\0\2\32\1\0\1\32"+ + "\3\0\2\32\1\0\2\32\1\0\2\32\1\0\5\32"+ + "\1\0\5\32\2\0\1\32\1\0\2\32\1\0\1\32"+ + "\1\0\3\32\4\0\1\32\4\0\1\32\16\0\1\32"+ + "\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+ + "\1\0\3\32\1\0\5\32\1\0\3\32\1\0\11\32"+ + "\1\0\2\32\1\0\16\32\1\0\2\32\1\0\21\32"+ + "\1\0\1\32\1\0\3\32\2\0\1\32\1\0\1\32"+ + "\1\0\2\32\1\0\1\32\16\0\1\32\1\0\1\32"+ + "\1\0\1\32\3\0\1\32\1\0\3\32\1\0\2\32"+ + "\1\0\2\32\1\0\3\32\1\0\11\32\1\0\2\32"+ + "\1\0\16\32\1\0\2\32\1\0\21\32\1\0\1\32"+ + "\1\0\3\32\2\0\1\32\1\0\1\32\1\0\2\32"+ + "\1\0\1\32\16\0\1\32\11\0\1\32\20\0\1\32"+ + "\33\0\1\32\21\0\1\32\10\0\1\32\23\0\1\32"+ + "\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+ + "\1\0\3\32\1\0\5\32\1\0\3\32\1\0\6\32"+ + "\1\0\2\32\1\0\2\32\1\0\10\32\1\0\5\32"+ + "\1\0\2\32\1\0\21\32\1\0\1\32\1\0\3\32"+ + "\2\0\1\32\1\0\1\32\1\0\2\32\1\0\1\32"+ + "\145\0\1\33\15\0\1\34\1\0\1\35\1\0\1\36"+ + "\1\0\1\37\1\0\1\40\1\0\1\41\3\0\1\42"+ + "\5\0\1\43\3\0\1\44\11\0\1\45\2\0\1\46"+ + "\16\0\1\47\2\0\1\50\41\0\2\25\1\51\1\0"+ + "\1\52\1\0\1\52\1\53\1\0\1\25\3\0\1\34"+ + "\1\0\1\35\1\0\1\36\1\0\1\37\1\0\1\40"+ + "\1\0\1\54\3\0\1\55\5\0\1\56\3\0\1\57"+ + "\11\0\1\45\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\1\25\2\26\2\0\2\63\1\64\1\0\1\26"+ + "\15\0\1\65\15\0\1\66\14\0\1\67\16\0\1\70"+ + "\2\0\1\71\21\0\1\72\20\0\1\27\1\0\1\27"+ + "\3\0\1\53\1\0\1\27\3\0\1\34\1\0\1\35"+ + "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\73"+ + "\3\0\1\55\5\0\1\56\3\0\1\74\11\0\1\45"+ + "\2\0\1\75\16\0\1\76\2\0\1\77\21\0\1\72"+ + "\17\0\1\25\1\100\1\26\1\27\3\0\1\100\1\0"+ + "\1\100\144\0\2\31\4\0\1\25\11\0\3\25\5\0"+ + "\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+ + "\1\25\1\0\2\25\4\0\1\25\5\0\1\25\3\0"+ + "\1\25\4\0\5\25\10\0\1\51\1\0\2\25\1\0"+ + "\1\25\10\0\1\25\23\0\1\25\1\0\1\51\7\0"+ + "\2\25\2\0\5\25\2\0\2\25\4\0\6\25\1\0"+ + "\2\25\4\0\5\25\1\0\5\25\1\0\2\25\1\0"+ + "\3\25\1\0\4\25\1\0\5\25\1\51\1\0\1\25"+ + "\1\0\1\25\1\0\3\25\2\0\1\25\1\0\1\25"+ + "\1\0\1\25\2\0\1\25\16\0\1\25\3\0\1\25"+ + "\5\0\2\25\3\0\1\25\4\0\3\25\4\0\1\25"+ + "\1\0\1\25\2\0\1\25\1\0\2\25\4\0\1\25"+ + "\1\0\1\25\3\0\2\25\1\0\1\25\5\0\3\25"+ + "\1\0\1\25\10\0\1\25\1\0\2\51\1\0\1\25"+ + "\10\0\1\25\23\0\1\25\3\0\1\25\6\0\2\25"+ + "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+ + "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+ + "\1\0\1\25\1\0\3\25\1\0\1\25\1\0\2\25"+ + "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+ + "\20\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+ + "\5\0\3\25\1\0\1\25\5\0\3\25\5\0\1\25"+ + "\2\0\2\25\4\0\1\25\2\0\1\25\1\0\1\25"+ + "\102\0\2\25\6\0\1\25\55\0\1\25\3\0\1\25"+ + "\2\0\1\25\3\0\1\25\5\0\1\25\7\0\1\25"+ + "\4\0\2\25\3\0\2\25\1\0\1\25\4\0\1\25"+ + "\1\0\1\25\2\0\2\25\1\0\3\25\1\0\1\25"+ + "\2\0\4\25\2\0\1\25\40\0\1\34\1\0\1\35"+ + "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\101"+ + "\3\0\1\42\5\0\1\43\3\0\1\102\11\0\1\45"+ + "\2\0\1\103\16\0\1\104\2\0\1\105\41\0\1\25"+ + "\2\51\2\0\2\106\1\53\1\0\1\51\3\0\1\34"+ + "\1\0\1\35\1\0\1\36\1\0\1\37\1\0\1\40"+ + "\1\0\1\107\3\0\1\110\5\0\1\111\3\0\1\112"+ + "\11\0\1\45\2\0\1\113\16\0\1\114\2\0\1\115"+ + "\41\0\1\25\1\52\7\0\1\52\3\0\1\34\1\0"+ + "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+ + "\1\116\3\0\1\42\5\0\1\43\3\0\1\117\11\0"+ + "\1\45\2\0\1\120\16\0\1\121\2\0\1\122\21\0"+ + "\1\72\17\0\1\25\1\53\1\51\1\27\3\0\1\53"+ + "\1\0\1\53\4\0\1\26\11\0\3\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\26"+ + "\1\0\2\26\4\0\1\25\5\0\1\25\3\0\1\26"+ + "\4\0\1\26\2\25\2\26\10\0\1\26\1\0\2\25"+ + "\1\0\1\26\10\0\1\25\23\0\1\25\3\0\1\25"+ + "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+ + "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+ + "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\26"+ + "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+ + "\1\0\2\25\20\0\1\25\3\0\1\25\5\0\1\25"+ + "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+ + "\2\26\5\0\1\25\2\0\1\25\1\26\4\0\1\25"+ + "\2\0\1\25\1\0\1\25\102\0\2\26\6\0\1\26"+ + "\55\0\1\26\3\0\1\26\2\0\1\26\3\0\1\26"+ + "\5\0\1\26\7\0\1\26\4\0\2\26\3\0\2\26"+ + "\1\0\1\26\4\0\1\26\1\0\1\26\2\0\2\26"+ + "\1\0\3\26\1\0\1\26\2\0\4\26\2\0\1\26"+ + "\52\0\1\123\3\0\1\124\5\0\1\125\3\0\1\126"+ + "\14\0\1\127\16\0\1\130\2\0\1\131\42\0\1\63"+ + "\1\26\6\0\1\63\3\0\1\34\1\0\1\35\1\0"+ + "\1\36\1\0\1\37\1\0\1\40\1\0\1\132\3\0"+ + "\1\55\5\0\1\56\3\0\1\133\11\0\1\45\2\0"+ + "\1\134\16\0\1\135\2\0\1\136\21\0\1\72\17\0"+ + "\1\25\1\64\1\26\1\27\3\0\1\64\1\0\1\64"+ + "\4\0\1\27\37\0\1\27\1\0\2\27\16\0\1\27"+ + "\4\0\1\27\2\0\2\27\15\0\1\27\131\0\1\27"+ + "\152\0\2\27\11\0\1\27\114\0\2\27\6\0\1\27"+ + "\55\0\1\27\3\0\1\27\2\0\1\27\3\0\1\27"+ + "\5\0\1\27\7\0\1\27\4\0\2\27\3\0\2\27"+ + "\1\0\1\27\4\0\1\27\1\0\1\27\2\0\2\27"+ + "\1\0\3\27\1\0\1\27\2\0\4\27\2\0\1\27"+ + "\152\0\1\27\34\0\1\100\11\0\3\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\100"+ + "\1\0\2\100\4\0\1\25\5\0\1\25\3\0\1\100"+ + "\4\0\1\100\2\25\2\100\10\0\1\26\1\0\2\25"+ + "\1\0\1\100\10\0\1\25\23\0\1\25\3\0\1\25"+ + "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+ + "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+ + "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\100"+ + "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+ + "\1\0\2\25\20\0\1\25\3\0\1\25\5\0\1\25"+ + "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+ + "\2\100\5\0\1\25\2\0\1\25\1\100\4\0\1\25"+ + "\2\0\1\25\1\0\1\25\102\0\2\100\6\0\1\100"+ + "\55\0\1\100\3\0\1\100\2\0\1\100\3\0\1\100"+ + "\5\0\1\100\7\0\1\100\4\0\2\100\3\0\2\100"+ + "\1\0\1\100\4\0\1\100\1\0\1\100\2\0\2\100"+ + "\1\0\3\100\1\0\1\100\2\0\4\100\2\0\1\100"+ + "\41\0\1\51\11\0\3\25\5\0\1\25\1\0\1\25"+ + "\1\0\1\25\4\0\1\25\4\0\1\51\1\0\2\51"+ + "\4\0\1\25\5\0\1\25\3\0\1\51\4\0\1\51"+ + "\2\25\2\51\10\0\1\51\1\0\2\25\1\0\1\51"+ + "\10\0\1\25\23\0\1\25\3\0\1\25\6\0\2\25"+ + "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+ + "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+ + "\1\0\1\25\1\0\3\25\1\0\1\51\1\0\2\25"+ + "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+ + "\20\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+ + "\5\0\3\25\1\0\1\25\5\0\1\25\2\51\5\0"+ + "\1\25\2\0\1\25\1\51\4\0\1\25\2\0\1\25"+ + "\1\0\1\25\102\0\2\51\6\0\1\51\55\0\1\51"+ + "\3\0\1\51\2\0\1\51\3\0\1\51\5\0\1\51"+ + "\7\0\1\51\4\0\2\51\3\0\2\51\1\0\1\51"+ + "\4\0\1\51\1\0\1\51\2\0\2\51\1\0\3\51"+ + "\1\0\1\51\2\0\4\51\2\0\1\51\52\0\1\137"+ + "\3\0\1\140\5\0\1\141\3\0\1\142\14\0\1\143"+ + "\16\0\1\144\2\0\1\145\42\0\1\106\1\51\6\0"+ + "\1\106\4\0\1\52\11\0\3\25\5\0\1\25\1\0"+ + "\1\25\1\0\1\25\4\0\1\25\4\0\1\52\1\0"+ + "\2\52\4\0\1\25\5\0\1\25\3\0\1\52\4\0"+ + "\1\52\2\25\2\52\12\0\2\25\1\0\1\52\10\0"+ + "\1\25\23\0\1\25\11\0\2\25\2\0\5\25\2\0"+ + "\2\25\4\0\6\25\1\0\2\25\4\0\5\25\1\0"+ + "\5\25\1\0\2\25\1\0\3\25\1\0\4\25\1\0"+ + "\5\25\2\0\1\25\1\0\1\25\1\0\3\25\2\0"+ + "\1\25\1\0\1\25\1\0\1\25\2\0\1\25\16\0"+ + "\1\25\3\0\1\25\5\0\2\25\3\0\1\25\4\0"+ + "\3\25\4\0\1\25\1\0\1\25\2\0\1\25\1\0"+ + "\2\25\4\0\1\25\1\0\1\25\3\0\2\25\1\0"+ + "\1\25\5\0\3\25\1\0\1\25\10\0\1\25\4\0"+ + "\1\25\10\0\1\25\23\0\1\25\3\0\1\25\6\0"+ + "\2\25\5\0\1\25\1\0\1\25\1\0\1\25\1\0"+ + "\11\25\2\0\1\25\4\0\1\25\4\0\6\25\2\0"+ + "\1\25\1\0\1\25\1\0\3\25\1\0\1\52\1\0"+ + "\2\25\4\0\3\25\1\0\1\25\10\0\1\25\1\0"+ + "\2\25\20\0\1\25\3\0\1\25\5\0\1\25\32\0"+ + "\15\25\5\0\3\25\1\0\1\25\5\0\1\25\2\52"+ + "\5\0\1\25\2\0\1\25\1\52\4\0\1\25\2\0"+ + "\1\25\1\0\1\25\102\0\2\52\6\0\1\52\55\0"+ + "\1\52\3\0\1\52\2\0\1\52\3\0\1\52\5\0"+ + "\1\52\7\0\1\52\4\0\2\52\3\0\2\52\1\0"+ + "\1\52\4\0\1\52\1\0\1\52\2\0\2\52\1\0"+ + "\3\52\1\0\1\52\2\0\4\52\2\0\1\52\41\0"+ + "\1\53\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+ + "\1\25\4\0\1\25\4\0\1\53\1\0\2\53\4\0"+ + "\1\25\5\0\1\25\3\0\1\53\4\0\1\53\2\25"+ + "\2\53\10\0\1\51\1\0\2\25\1\0\1\53\10\0"+ + "\1\25\23\0\1\25\3\0\1\25\6\0\2\25\5\0"+ + "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+ + "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+ + "\1\25\1\0\3\25\1\0\1\53\1\0\2\25\4\0"+ + "\3\25\1\0\1\25\10\0\1\25\1\0\2\25\20\0"+ + "\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+ + "\3\25\1\0\1\25\5\0\1\25\2\53\5\0\1\25"+ + "\2\0\1\25\1\53\4\0\1\25\2\0\1\25\1\0"+ + "\1\25\102\0\2\53\6\0\1\53\55\0\1\53\3\0"+ + "\1\53\2\0\1\53\3\0\1\53\5\0\1\53\7\0"+ + "\1\53\4\0\2\53\3\0\2\53\1\0\1\53\4\0"+ + "\1\53\1\0\1\53\2\0\2\53\1\0\3\53\1\0"+ + "\1\53\2\0\4\53\2\0\1\53\41\0\1\63\37\0"+ + "\1\63\1\0\2\63\16\0\1\63\4\0\1\63\2\0"+ + "\2\63\10\0\1\26\4\0\1\63\36\0\1\26\102\0"+ + "\1\26\146\0\2\26\133\0\1\63\152\0\2\63\11\0"+ + "\1\63\114\0\2\63\6\0\1\63\55\0\1\63\3\0"+ + "\1\63\2\0\1\63\3\0\1\63\5\0\1\63\7\0"+ + "\1\63\4\0\2\63\3\0\2\63\1\0\1\63\4\0"+ + "\1\63\1\0\1\63\2\0\2\63\1\0\3\63\1\0"+ + "\1\63\2\0\4\63\2\0\1\63\41\0\1\64\11\0"+ + "\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+ + "\1\25\4\0\1\64\1\0\2\64\4\0\1\25\5\0"+ + "\1\25\3\0\1\64\4\0\1\64\2\25\2\64\10\0"+ + "\1\26\1\0\2\25\1\0\1\64\10\0\1\25\23\0"+ + "\1\25\3\0\1\25\6\0\2\25\5\0\1\25\1\0"+ + "\1\25\1\0\1\25\1\0\11\25\2\0\1\25\4\0"+ + "\1\25\4\0\6\25\2\0\1\25\1\0\1\25\1\0"+ + "\3\25\1\0\1\64\1\0\2\25\4\0\3\25\1\0"+ + "\1\25\10\0\1\25\1\0\2\25\20\0\1\25\3\0"+ + "\1\25\5\0\1\25\32\0\15\25\5\0\3\25\1\0"+ + "\1\25\5\0\1\25\2\64\5\0\1\25\2\0\1\25"+ + "\1\64\4\0\1\25\2\0\1\25\1\0\1\25\102\0"+ + "\2\64\6\0\1\64\55\0\1\64\3\0\1\64\2\0"+ + "\1\64\3\0\1\64\5\0\1\64\7\0\1\64\4\0"+ + "\2\64\3\0\2\64\1\0\1\64\4\0\1\64\1\0"+ + "\1\64\2\0\2\64\1\0\3\64\1\0\1\64\2\0"+ + "\4\64\2\0\1\64\41\0\1\106\37\0\1\106\1\0"+ + "\2\106\16\0\1\106\4\0\1\106\2\0\2\106\10\0"+ + "\1\51\4\0\1\106\36\0\1\51\102\0\1\51\146\0"+ + "\2\51\133\0\1\106\152\0\2\106\11\0\1\106\114\0"+ + "\2\106\6\0\1\106\55\0\1\106\3\0\1\106\2\0"+ + "\1\106\3\0\1\106\5\0\1\106\7\0\1\106\4\0"+ + "\2\106\3\0\2\106\1\0\1\106\4\0\1\106\1\0"+ + "\1\106\2\0\2\106\1\0\3\106\1\0\1\106\2\0"+ + "\4\106\2\0\1\106\37\0"; private static int [] zzUnpackTrans() { - int [] result = new int[169]; + int [] result = new int[9180]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -299,11 +584,11 @@ private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\1\0\1\11\5\1\2\11\1\1\1\0\1\1\1\0"+ - "\1\1\2\0"; + "\1\0\1\11\27\1\2\11\15\0\1\1\1\0\1\1"+ + "\10\0\1\1\61\0"; private static int [] zzUnpackAttribute() { - int [] result = new int[16]; + int [] result = new int[101]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -434,7 +719,7 @@ char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 2174) { + while (i < 2640) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); @@ -713,28 +998,28 @@ zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 1: - { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + case 2: + { return WORD_TYPE; } case 7: break; - case 6: - { return HIRAGANA_TYPE; + case 4: + { return SOUTH_EAST_ASIAN_TYPE; } case 8: break; - case 2: - { return WORD_TYPE; - } - case 9: break; case 5: { return IDEOGRAPHIC_TYPE; } + case 9: break; + case 1: + { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + } case 10: break; case 3: { return NUMERIC_TYPE; } case 11: break; - case 4: - { return SOUTH_EAST_ASIAN_TYPE; + case 6: + { return HIRAGANA_TYPE; } case 12: break; default: Index: modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (revision 1055272) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (working copy) @@ -15,8 +15,8 @@ */ // Generated from IANA Root Zone Database -// file version from Saturday, December 4, 2010 12:34:19 PM UTC -// generated on Sunday, December 5, 2010 12:24:12 AM UTC +// file version from Wednesday, January 5, 2011 12:34:09 PM UTC +// generated on Wednesday, January 5, 2011 6:38:32 PM UTC // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros ASCIITLD = "." ( @@ -306,6 +306,7 @@ | [xX][nN]--[pP]1[aA][iI] | [xX][nN]--[pP][gG][bB][sS]0[dD][hH] | [xX][nN]--[wW][gG][bB][hH]1[cC] + | [xX][nN]--[wW][gG][bB][lL]6[aA] | [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA] | [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX] | [xX][nN]--[zZ][cC][kK][zZ][aA][hH] Index: modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (revision 0) @@ -0,0 +1,105 @@ +ALetterSupp = ( + ([\ud80d][\uDC00-\uDC2E]) + | ([\ud80c][\uDC00-\uDFFF]) + | ([\ud809][\uDC00-\uDC62]) + | ([\ud808][\uDC00-\uDF6E]) + | ([\ud81a][\uDC00-\uDE38]) + | ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF]) + | ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB]) + | ([\ud801][\uDC00-\uDC9D]) + | ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]) + | ([\ud803][\uDC00-\uDC48]) + | ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]) +) +FormatSupp = ( + ([\ud804][\uDCBD]) + | ([\ud834][\uDD73-\uDD7A]) + | ([\udb40][\uDC01\uDC20-\uDC7F]) +) +ExtendSupp = ( + ([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA]) + | ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]) + | ([\ud800][\uDDFD]) + | ([\udb40][\uDD00-\uDDEF]) + | ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F]) +) +NumericSupp = ( + ([\ud804][\uDC66-\uDC6F]) + | ([\ud835][\uDFCE-\uDFFF]) + | ([\ud801][\uDCA0-\uDCA9]) +) +KatakanaSupp = ( + ([\ud82c][\uDC00]) +) +MidLetterSupp = ( + [] +) +MidNumSupp = ( + [] +) +MidNumLetSupp = ( + [] +) +ExtendNumLetSupp = ( + [] +) +ExtendNumLetSupp = ( + [] +) +ComplexContextSupp = ( + [] +) +HanSupp = ( + ([\ud87e][\uDC00-\uDE1D]) + | ([\ud86b][\uDC00-\uDFFF]) + | ([\ud86a][\uDC00-\uDFFF]) + | ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF]) + | ([\ud868][\uDC00-\uDFFF]) + | ([\ud86e][\uDC00-\uDC1D]) + | ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF]) + | ([\ud86c][\uDC00-\uDFFF]) + | ([\ud863][\uDC00-\uDFFF]) + | ([\ud862][\uDC00-\uDFFF]) + | ([\ud861][\uDC00-\uDFFF]) + | ([\ud860][\uDC00-\uDFFF]) + | ([\ud867][\uDC00-\uDFFF]) + | ([\ud866][\uDC00-\uDFFF]) + | ([\ud865][\uDC00-\uDFFF]) + | ([\ud864][\uDC00-\uDFFF]) + | ([\ud858][\uDC00-\uDFFF]) + | ([\ud859][\uDC00-\uDFFF]) + | ([\ud85a][\uDC00-\uDFFF]) + | ([\ud85b][\uDC00-\uDFFF]) + | ([\ud85c][\uDC00-\uDFFF]) + | ([\ud85d][\uDC00-\uDFFF]) + | ([\ud85e][\uDC00-\uDFFF]) + | ([\ud85f][\uDC00-\uDFFF]) + | ([\ud850][\uDC00-\uDFFF]) + | ([\ud851][\uDC00-\uDFFF]) + | ([\ud852][\uDC00-\uDFFF]) + | ([\ud853][\uDC00-\uDFFF]) + | ([\ud854][\uDC00-\uDFFF]) + | ([\ud855][\uDC00-\uDFFF]) + | ([\ud856][\uDC00-\uDFFF]) + | ([\ud857][\uDC00-\uDFFF]) + | ([\ud849][\uDC00-\uDFFF]) + | ([\ud848][\uDC00-\uDFFF]) + | ([\ud84b][\uDC00-\uDFFF]) + | ([\ud84a][\uDC00-\uDFFF]) + | ([\ud84d][\uDC00-\uDFFF]) + | ([\ud84c][\uDC00-\uDFFF]) + | ([\ud84f][\uDC00-\uDFFF]) + | ([\ud84e][\uDC00-\uDFFF]) + | ([\ud841][\uDC00-\uDFFF]) + | ([\ud840][\uDC00-\uDFFF]) + | ([\ud843][\uDC00-\uDFFF]) + | ([\ud842][\uDC00-\uDFFF]) + | ([\ud845][\uDC00-\uDFFF]) + | ([\ud844][\uDC00-\uDFFF]) + | ([\ud847][\uDC00-\uDFFF]) + | ([\ud846][\uDC00-\uDFFF]) +) +HiraganaSupp = ( + ([\ud83c][\uDE00]) + | ([\ud82c][\uDC01]) +)