Index: lucene/common-build.xml
===================================================================
--- lucene/common-build.xml (revision 991408)
+++ lucene/common-build.xml (working copy)
@@ -640,8 +640,8 @@
source="@{javac.source}"
target="@{javac.target}">
-
-
+
Index: lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
===================================================================
--- lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 991408)
+++ lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy)
@@ -247,8 +247,8 @@
public void testCJK() throws Exception {
// Test Ideographic Space - As wide as a CJK character cell (fullwidth)
// used google to translate the word "term" to japanese -> 用語
- assertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
- assertQueryEquals("用語\u3000用語\u3000用語", null, "用語\u0020用語\u0020用語");
+ assertQueryEquals("term\u3000term\u3000term", new MockAnalyzer(), "term\u0020term\u0020term");
+ assertQueryEquals("用語\u3000用語\u3000用語", new MockAnalyzer(), "用語\u0020用語\u0020用語");
}
//individual CJK chars as terms, like StandardAnalyzer
Index: lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java
===================================================================
--- lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java (revision 991408)
+++ lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java (working copy)
@@ -37,10 +37,10 @@
*/
public static final CharacterRunAutomaton KEYWORD =
new CharacterRunAutomaton(new RegExp(".*").toAutomaton());
- /** Acts like LetterTokenizer. */
- // the ugly regex below is Unicode 5.2 [:Letter:]
+ /** Acts like LetterTokenizer, but only on latin1.
+ * a full isCharacter(Letter) regexp is huge and slow to load */
public static final CharacterRunAutomaton SIMPLE =
- new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬˮͰ-ʹͶͷͺ-ͽΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԥԱ-Ֆՙա-ևא-תװ-ײء-يٮٯٱ-ۓەۥۦۮۯۺ-ۼۿܐܒ-ܯݍ-ޥޱߊ-ߪߴߵߺࠀ-ࠕࠚࠤࠨऄ-हऽॐक़-ॡॱॲॹ-ॿঅ-ঌএঐও-নপ-রলশ-হঽৎড়ঢ়য়-ৡৰৱਅ-ਊਏਐਓ-ਨਪ-ਰਲਲ਼ਵਸ਼ਸਹਖ਼-ੜਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલળવ-હઽૐૠૡଅ-ଌଏଐଓ-ନପ-ରଲଳଵ-ହଽଡ଼ଢ଼ୟ-ୡୱஃஅ-ஊஎ-ஐஒ-கஙசஜஞடணதந-பம-ஹௐఅ-ఌఎ-ఐఒ-నప-ళవ-హఽౘౙౠౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽೞೠೡഅ-ഌഎ-ഐഒ-നപ-ഹഽൠൡൺ-ൿඅ-ඖක-නඳ-රලව-ෆก-ะาำเ-ๆກຂຄງຈຊຍດ-ທນ-ຟມ-ຣລວສຫອ-ະາຳຽເ-ໄໆໜໝༀཀ-ཇཉ-ཬྈ-ྋက-ဪဿၐ-ၕၚ-ၝၡၥၦၮ-ၰၵ-ႁႎႠ-Ⴥა-ჺჼᄀ-ቈቊ-ቍቐ-ቖቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏼᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗៜᠠ-ᡷᢀ-ᢨᢪᢰ-ᣵᤀ-ᤜᥐ-ᥭᥰ-ᥴᦀ-ᦫᧁ-ᧇᨀ-ᨖᨠ-ᩔᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮᮯᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱⁿₐ-ₔℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⴀ-ⴥⴰ-ⵥⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ々〆〱-〵〻〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆷㇰ-ㇿ㐀-䶵一-鿋ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪꘫꙀ-ꙟꙢ-ꙮꙿ-ꚗꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋꞌꟻ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺꪀ-ꪯꪱꪵꪶꪹ-ꪽꫀꫂꫛ-ꫝꯀ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-鶴侮-舘並-龎ff-stﬓ-ﬗיִײַ-ﬨשׁ-זּטּ-לּמּנּסּףּפּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌞𐌰-𐍀𐍂-𐍉𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐠀-𐠅𐠈𐠊-𐠵𐠷𐠸𐠼𐠿-𐡕𐤀-𐤕𐤠-𐤹𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐬀-𐬵𐭀-𐭕𐭠-𐭲𐰀-𐱈𑂃-𑂯𒀀-𒍮𓀀-𓐮𝐀-𝑔𝑖-𝒜𝒞𝒟𝒢𝒥𝒦𝒩-𝒬𝒮-𝒹𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𠀀-𪛖𪜀-𫜴丽-𪘀]+").toAutomaton());
+ new CharacterRunAutomaton(new RegExp("[a-zA-Z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF]+").toAutomaton());
private final CharacterRunAutomaton runAutomaton;
private final boolean lowerCase;
Index: lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
===================================================================
--- lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java (revision 991408)
+++ lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java (working copy)
@@ -47,7 +47,7 @@
}
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
+ MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.WHITESPACE, true);
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
}
@@ -64,7 +64,7 @@
builder.append("a");
}
builder.append("\ud801\udc1cabc");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
+ MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.WHITESPACE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
}
}
@@ -78,7 +78,7 @@
for (int i = 0; i < 255; i++) {
builder.append("A");
}
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
+ MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.WHITESPACE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
@@ -92,7 +92,7 @@
builder.append("A");
}
builder.append("\ud801\udc1c");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
+ MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.WHITESPACE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
Index: lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java
===================================================================
--- lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java (revision 991408)
+++ lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java (working copy)
@@ -43,7 +43,7 @@
try {
java4.codePointAt(highSurrogateAt3, 4);
fail("array index out of bounds");
- } catch (ArrayIndexOutOfBoundsException e) {
+ } catch (IndexOutOfBoundsException e) {
}
CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT);
@@ -54,7 +54,7 @@
try {
java5.codePointAt(highSurrogateAt3, 4);
fail("array index out of bounds");
- } catch (ArrayIndexOutOfBoundsException e) {
+ } catch (IndexOutOfBoundsException e) {
}
}
@@ -69,7 +69,7 @@
try {
java4.codePointAt(highSurrogateAt3, 4);
fail("string index out of bounds");
- } catch (StringIndexOutOfBoundsException e) {
+ } catch (IndexOutOfBoundsException e) {
}
CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT);
@@ -80,7 +80,7 @@
try {
java5.codePointAt(highSurrogateAt3, 4);
fail("string index out of bounds");
- } catch (StringIndexOutOfBoundsException e) {
+ } catch (IndexOutOfBoundsException e) {
}
}