Index: modules/analysis/icu/src/java/overview.html =================================================================== --- modules/analysis/icu/src/java/overview.html (revision 1074125) +++ modules/analysis/icu/src/java/overview.html (working copy) @@ -66,12 +66,12 @@
+
/**
* This tokenizer will work well in general for most languages.
*/
Tokenizer tokenizer = new ICUTokenizer(reader);
-
+
@@ -111,7 +111,7 @@
+
Collator collator = Collator.getInstance(new Locale("ar"));
ICUCollationKeyAnalyzer analyzer = new ICUCollationKeyAnalyzer(collator);
RAMDirectory ramDir = new RAMDirectory();
@@ -138,10 +138,10 @@
ScoreDoc[] result
= is.search(aqp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
-
+
+
Analyzer analyzer
= new ICUCollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
RAMDirectory indexStore = new RAMDirectory();
@@ -166,10 +166,10 @@
Document doc = searcher.doc(result[i].doc);
assertEquals(sortedTracerOrder[i], doc.getValues("tracer")[0]);
}
-
+
+
Collator collator = Collator.getInstance(new Locale("tr", "TR"));
collator.setStrength(Collator.PRIMARY);
Analyzer analyzer = new ICUCollationKeyAnalyzer(collator);
@@ -185,7 +185,7 @@
Query query = parser.parse("d\u0131gy"); // U+0131: dotless i
ScoreDoc[] result = is.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
-
+
@@ -245,7 +245,7 @@
+
/**
* Normalizer2 objects are unmodifiable and immutable.
*/
@@ -254,7 +254,7 @@
* This filter will normalize to NFC.
*/
TokenStream tokenstream = new ICUNormalizer2Filter(tokenizer, normalizer);
-
+
@@ -284,12 +284,12 @@
+
/**
* This filter will case-fold and normalize to NFKC.
*/
TokenStream tokenstream = new ICUNormalizer2Filter(tokenizer);
-
+
@@ -311,13 +311,13 @@
+
/**
* This filter will case-fold, remove accents and other distinctions, and
* normalize to NFKC.
*/
TokenStream tokenstream = new ICUFoldingFilter(tokenizer);
-
+
@@ -341,19 +341,19 @@
+
/**
* This filter will map Traditional Chinese to Simplified Chinese
*/
TokenStream tokenstream = new ICUTransformFilter(tokenizer, Transliterator.getInstance("Traditional-Simplified"));
-
+
+
/**
* This filter will map Serbian Cyrillic to Serbian Latin according to BGN rules
*/
TokenStream tokenstream = new ICUTransformFilter(tokenizer, Transliterator.getInstance("Serbian-Latin/BGN"));
-
+
@@ -365,7 +365,7 @@
+
/**
* This filter will do NFC normalization, but will ignore any characters that
* did not exist as of Unicode 5.0. Because of the normalization stability policy
@@ -377,6 +377,6 @@
set.freeze();
FilteredNormalizer2 unicode50 = new FilteredNormalizer2(normalizer, set);
TokenStream tokenstream = new ICUNormalizer2Filter(tokenizer, unicode50);
-
+