Index: solr/contrib/extraction/lib/icu4j-4_6.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: solr\contrib\extraction\lib\icu4j-4_6.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: solr/contrib/extraction/lib/icu4j-4_4_2.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: solr/contrib/analysis-extras/lib/icu4j-4_6.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: solr\contrib\analysis-extras\lib\icu4j-4_6.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: solr/contrib/analysis-extras/lib/icu4j-4_4_2.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: modules/analysis/icu/lib/icu4j-4_6.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: modules\analysis\icu\lib\icu4j-4_6.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: modules/analysis/icu/lib/icu4j-4_4_2.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (revision 1042050) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (working copy) @@ -73,6 +73,7 @@ // The ICU Collator and java.text.Collator implementations differ in their // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US. testCollationKeySort - (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD"); + (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, + "BFJHD", "ECAGI", "BJDFH", "BJDHF"); } } Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (revision 1042050) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (working copy) @@ -91,6 +91,7 @@ // The ICU Collator and java.text.Collator implementations differ in their // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US. testCollationKeySort - (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD"); + (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, + "BFJHD", "ECAGI", "BJDFH", "BJDHF"); } } Index: modules/analysis/icu/src/java/overview.html =================================================================== --- modules/analysis/icu/src/java/overview.html (revision 1042050) +++ modules/analysis/icu/src/java/overview.html (working copy) @@ -358,7 +358,7 @@

Backwards Compatibility

This module exists to provide up-to-date Unicode functionality that supports -the most recent version of Unicode (currently 5.2). However, some users who wish +the most recent version of Unicode (currently 6.0). However, some users who wish for stronger backwards compatibility can restrict {@link org.apache.lucene.analysis.icu.ICUNormalizer2Filter} to operate on only a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}. Index: modules/analysis/icu/src/data/utr30/nfkc.txt =================================================================== --- modules/analysis/icu/src/data/utr30/nfkc.txt (revision 1042050) +++ modules/analysis/icu/src/data/utr30/nfkc.txt (working copy) @@ -26,7 +26,7 @@ # # file name: nfkc.txt # -# machine-generated on: 2009-11-30 +# machine-generated on: 2010-07-23 # # Round-trip mappings (=) containing [:Diacritic:] (see DiacriticFolding.txt) # were changed to one-way decompositions, as their non-starters now decompose. @@ -114,6 +114,7 @@ 0657..065B:230 065C:220 065D..065E:230 +065F:220 0670:35 06D6..06DC:230 06DF..06E2:230 @@ -150,6 +151,7 @@ 081B..0823:230 0825..0827:230 0829..082D:230 +0859..085B:220 093C:7 094D:9 0951:230 @@ -192,7 +194,7 @@ 1037:7 1039..103A:9 108D:220 -135F:230 +135D..135F:230 1714:9 1734:9 17D2:9 @@ -212,6 +214,8 @@ 1B6C:220 1B6D..1B73:230 1BAA:9 +1BE6:7 +1BF2..1BF3:9 1C37:7 1CD0..1CD2:230 1CD4:1 @@ -231,6 +235,7 @@ 1DCF:220 1DD0:202 1DD1..1DE6:230 +1DFC:233 1DFD:220 1DFE:230 1DFF:220 @@ -248,6 +253,7 @@ 20EC..20EF:220 20F0:230 2CEF..2CF1:230 +2D7F:9 2DE0..2DFF:230 302A:218 302B:228 @@ -281,6 +287,7 @@ 10A39:1 10A3A:220 10A3F:9 +11046:9 110B9:9 110BA:7 1D165..1D166:216 @@ -1426,6 +1433,14 @@ 2092>006F 2093>0078 2094>0259 +2095>0068 +2096>006B +2097>006C +2098>006D +2099>006E +209A>0070 +209B>0073 +209C>0074 20A8>0052 0073 2100>0061 002F 0063 2101>0061 002F 0073 @@ -5213,18 +5228,42 @@ 1F12C>0052 1F12D>0043 0044 1F12E>0057 005A +1F130>0041 1F131>0042 +1F132>0043 +1F133>0044 +1F134>0045 +1F135>0046 +1F136>0047 +1F137>0048 +1F138>0049 +1F139>004A +1F13A>004B +1F13B>004C +1F13C>004D 1F13D>004E +1F13E>004F 1F13F>0050 +1F140>0051 +1F141>0052 1F142>0053 +1F143>0054 +1F144>0055 +1F145>0056 1F146>0057 +1F147>0058 +1F148>0059 +1F149>005A 1F14A>0048 0056 1F14B>004D 0056 1F14C>0053 0044 1F14D>0053 0053 1F14E>0050 0050 0056 +1F14F>0057 0043 1F190>0044 004A 1F200>307B 304B +1F201>30B3 30B3 +1F202>30B5 1F210>624B 1F211>5B57 1F212>53CC @@ -5259,6 +5298,15 @@ 1F22F>6307 1F230>8D70 1F231>6253 +1F232>7981 +1F233>7A7A +1F234>5408 +1F235>6E80 +1F236>6709 +1F237>6708 +1F238>7533 +1F239>5272 +1F23A>55B6 1F240>3014 672C 3015 1F241>3014 4E09 3015 1F242>3014 4E8C 3015 @@ -5268,6 +5316,8 @@ 1F246>3014 76D7 3015 1F247>3014 52DD 3015 1F248>3014 6557 3015 +1F250>5F97 +1F251>53EF 2F800>4E3D 2F801>4E38 2F802>4E41 Index: modules/analysis/icu/src/data/utr30/nfkc_cf.txt =================================================================== --- modules/analysis/icu/src/data/utr30/nfkc_cf.txt (revision 1042050) +++ modules/analysis/icu/src/data/utr30/nfkc_cf.txt (working copy) @@ -19,11 +19,11 @@ # remains attached. # # Extracted from: -# DerivedNormalizationProps-5.2.0.txt -# Date: 2009-08-26, 18:18:50 GMT [MD] +# DerivedNormalizationProps-6.0.0.txt +# Date: 2010-05-20, 15:14:12 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2009 Unicode, Inc. +# Copyright (c) 1991-2010 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -43,7 +43,7 @@ # WARNING: Application to STRINGS must apply NFC after mapping each character, because characters may interact. # For more information, see [http://www.unicode.org/reports/tr44/] # Omitted code points are unchanged by this mapping. -# @missing: 0000..10FFFF> +# @missing: 0000..10FFFF; NFKC_CF; # All code points not explicitly listed for NFKC_Casefold # have the value . @@ -531,6 +531,7 @@ 0520>0521 0522>0523 0524>0525 +0526>0527 0531>0561 0532>0562 0533>0563 @@ -1097,6 +1098,14 @@ 2092>006F 2093>0078 2094>0259 +2095>0068 +2096>006B +2097>006C +2098>006D +2099>006E +209A>0070 +209B>0073 +209C>0074 20A8>0072 0073 2100>0061 002F 0063 2101>0061 002F 0073 @@ -2312,6 +2321,7 @@ A65A>A65B A65C>A65D A65E>A65F +A660>A661 A662>A663 A664>A665 A666>A667 @@ -2378,6 +2388,13 @@ A784>A785 A786>A787 A78B>A78C +A78D>0265 +A790>A791 +A7A0>A7A1 +A7A2>A7A3 +A7A4>A7A5 +A7A6>A7A7 +A7A8>A7A9 F900>8C48 F901>66F4 F902>8ECA @@ -4798,18 +4815,42 @@ 1F12C>0072 1F12D>0063 0064 1F12E>0077 007A +1F130>0061 1F131>0062 +1F132>0063 +1F133>0064 +1F134>0065 +1F135>0066 +1F136>0067 +1F137>0068 +1F138>0069 +1F139>006A +1F13A>006B +1F13B>006C +1F13C>006D 1F13D>006E +1F13E>006F 1F13F>0070 +1F140>0071 +1F141>0072 1F142>0073 +1F143>0074 +1F144>0075 +1F145>0076 1F146>0077 +1F147>0078 +1F148>0079 +1F149>007A 1F14A>0068 0076 1F14B>006D 0076 1F14C>0073 0064 1F14D>0073 0073 1F14E>0070 0070 0076 +1F14F>0077 0063 1F190>0064 006A 1F200>307B 304B +1F201>30B3 30B3 +1F202>30B5 1F210>624B 1F211>5B57 1F212>53CC @@ -4844,6 +4885,15 @@ 1F22F>6307 1F230>8D70 1F231>6253 +1F232>7981 +1F233>7A7A +1F234>5408 +1F235>6E80 +1F236>6709 +1F237>6708 +1F238>7533 +1F239>5272 +1F23A>55B6 1F240>3014 672C 3015 1F241>3014 4E09 3015 1F242>3014 4E8C 3015 @@ -4853,6 +4903,8 @@ 1F246>3014 76D7 3015 1F247>3014 52DD 3015 1F248>3014 6557 3015 +1F250>5F97 +1F251>53EF 2F800>4E3D 2F801>4E38 2F802>4E41 @@ -5393,4 +5445,4 @@ E0100..E01EF> E01F0..E0FFF> -# Total code points: 9740 +# Total code points: 9792 Index: modules/analysis/icu/src/data/utr30/NativeDigitFolding.txt =================================================================== --- modules/analysis/icu/src/data/utr30/NativeDigitFolding.txt (revision 1042050) +++ modules/analysis/icu/src/data/utr30/NativeDigitFolding.txt (working copy) @@ -403,6 +403,17 @@ 104A7>0037 104A8>0038 104A9>0039 +# Brahmi +11066>0030 +11067>0031 +11068>0032 +11069>0033 +1106A>0034 +1106B>0035 +1106C>0036 +1106D>0037 +1106E>0038 +1106F>0039 # Mathematical Alphanumeric Symbols - Bold digits 1D7CE>0030 1D7CF>0031 Index: modules/analysis/icu/src/data/utr30/DiacriticFolding.txt =================================================================== --- modules/analysis/icu/src/data/utr30/DiacriticFolding.txt (revision 1042050) +++ modules/analysis/icu/src/data/utr30/DiacriticFolding.txt (working copy) @@ -531,7 +531,7 @@ 06E7..06E9> 06ED> 0653..0656> -0659..065E> +0659..065F> 0670> 0711> 07FA> @@ -550,6 +550,7 @@ A670..A672> A802> 10A3F> +11046> 1D165..1D166> 1D242..1D244> Index: modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (revision 1042050) +++ modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (working copy) @@ -167,11 +167,16 @@ // Copied (and slightly modified) from // org.apache.lucene.search.TestSort.testInternationalSort() // + // TODO: this test is really fragile. there are already 3 different cases, + // depending upon unicode version. public void testCollationKeySort(Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, - String usResult) throws Exception { + String usResult, + String frResult, + String svResult, + String dkResult) throws Exception { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); @@ -220,13 +225,13 @@ assertMatches(searcher, queryY, sort, usResult); sort.setSort(new SortField("France", SortField.STRING)); - assertMatches(searcher, queryX, sort, "EACGI"); + assertMatches(searcher, queryX, sort, frResult); sort.setSort(new SortField("Sweden", SortField.STRING)); - assertMatches(searcher, queryY, sort, "BJDFH"); + assertMatches(searcher, queryY, sort, svResult); sort.setSort(new SortField("Denmark", SortField.STRING)); - assertMatches(searcher, queryY, sort, "BJDHF"); + assertMatches(searcher, queryY, sort, dkResult); } // Make sure the documents returned by the search match the expected list Index: modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (revision 1042050) +++ modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (working copy) @@ -76,6 +76,7 @@ // The ICU Collator and Sun java.text.Collator implementations differ in their // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US. testCollationKeySort - (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH"); + (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, + oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF"); } } Index: modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (revision 1042050) +++ modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (working copy) @@ -94,6 +94,7 @@ // The ICU Collator and Sun java.text.Collator implementations differ in their // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US. testCollationKeySort - (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH"); + (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, + oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF"); } } Index: modules/analysis/README.txt =================================================================== --- modules/analysis/README.txt (revision 1042050) +++ modules/analysis/README.txt (working copy) @@ -20,7 +20,7 @@ lucene-analyzers-icu-XX.jar An add-on analysis library that provides improved Unicode support via International Components for Unicode (ICU). Note: this module depends on - the ICU4j jar file (version >= 4.4.0) + the ICU4j jar file (version >= 4.6.0) lucene-analyzers-phonetic-XX.jar An add-on analysis library that provides phonetic encoders via Apache Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 1042050) +++ lucene/contrib/CHANGES.txt (working copy) @@ -279,7 +279,7 @@ dependency management between contribs by a new ANT macro. (Uwe Schindler, Shai Erera) - * LUCENE-2399, LUCENE-2683: Upgrade contrib/icu's ICU jar file to ICU 4.4.2 + * LUCENE-2797: Upgrade contrib/icu's ICU jar file to ICU 4.6 (Robert Muir) Optimizations