Index: src/site/src/documentation/content/xdocs/site.xml
===================================================================
--- src/site/src/documentation/content/xdocs/site.xml	(revision 755833)
+++ src/site/src/documentation/content/xdocs/site.xml	(working copy)
@@ -53,6 +53,7 @@
 		    <javadoc-contrib-bdb label="Bdb" href="ext:javadocs-contrib-bdb"/>
 		    <javadoc-contrib-bdb-je label="Bdb-je" href="ext:javadocs-contrib-bdb-je"/>
 		    <javadoc-contrib-benchmark label="Benchmark" href="ext:javadocs-contrib-benchmark"/>
+        <javadoc-contrib-collation label="Collation" href="ext:javadocs-contrib-collation"/>
 		    <javadoc-contrib-highlighter label="Highlighter" href="ext:javadocs-contrib-highlighter"/>
 		    <javadoc-contrib-instantiated label="Instantiated" href="ext:javadocs-contrib-instantiated"/>
 		    <javadoc-contrib-lucli label="Lucli" href="ext:javadocs-contrib-lucli"/>
@@ -100,6 +101,7 @@
 	<javadocs-contrib-bdb href="api/contrib-bdb/index.html"/>
 	<javadocs-contrib-bdb-je href="api/contrib-bdb-je/index.html"/>
 	<javadocs-contrib-benchmark href="api/contrib-benchmark/index.html"/>
+  <javadocs-contrib-collation href="api/contrib-collation/index.html"/>
 	<javadocs-contrib-highlighter href="api/contrib-highlighter/index.html"/>
 	<javadocs-contrib-instantiated href="api/contrib-instantiated/index.html"/>
 	<javadocs-contrib-lucli href="api/contrib-lucli/index.html"/>
Index: build.xml
===================================================================
--- build.xml	(revision 755833)
+++ build.xml	(working copy)
@@ -288,6 +288,7 @@
           <packageset dir="contrib/analyzers/src/java"/>
           <packageset dir="contrib/ant/src/java"/>
           <packageset dir="contrib/benchmark/src/java"/>
+          <packageset dir="contrib/collation/src/java"/>
           <packageset dir="contrib/db/bdb-je/src/java"/>
           <packageset dir="contrib/db/bdb/src/java"/>
           <packageset dir="contrib/highlighter/src/java"/>
@@ -318,6 +319,7 @@
           <group title="contrib: Analysis" packages="org.apache.lucene.analysis.*"/>
           <group title="contrib: Ant" packages="org.apache.lucene.ant*"/>
           <group title="contrib: Benchmark" packages="org.apache.lucene.benchmark*"/>
+          <group title="contrib: Collation" packages="org.apache.lucene.collation*"/>
           <group title="contrib: DB" packages="org.apache.lucene.store.db*:org.apache.lucene.store.je*:com.sleepycat*"/>
           <group title="contrib: Highlighter" packages="org.apache.lucene.search.highlight*"/>
           <group title="contrib: Instantiated" packages="org.apache.lucene.store.instantiated*"/>
Index: contrib/collation/pom.xml.template
===================================================================
--- contrib/collation/pom.xml.template	(revision 0)
+++ contrib/collation/pom.xml.template	(revision 0)
@@ -0,0 +1,47 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+  -->
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.lucene</groupId>
+    <artifactId>lucene-contrib</artifactId>
+    <version>@version@</version>
+  </parent>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-collation</artifactId>
+  <name>
+    Lucene CollationKeyFilter/Analyzer &amp; ICUCollationKeyFilter/Analyzer
+  </name>
+  <version>@version@</version>
+  <description>    
+    CollationKeyFilter, ICUCollationKeyFilter, CollationKeyAnalyzer, and
+    ICUCollationKeyAnalyzer - converts tokens into indexable collation keys
+  </description>
+  <packaging>jar</packaging>
+  <dependencies>
+    <dependency>
+      <groupId>com.ibm.icu</groupId>
+      <artifactId>icu4j</artifactId>
+      <version>${icu-version}</version>
+    </dependency>
+  </dependencies>
+</project>
Index: contrib/collation/lib/ICU-LICENSE.txt
===================================================================
--- contrib/collation/lib/ICU-LICENSE.txt	(revision 0)
+++ contrib/collation/lib/ICU-LICENSE.txt	(revision 0)
@@ -0,0 +1,33 @@
+ICU License - ICU 1.8.1 and later
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2008 International Business Machines Corporation and others
+
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, and/or sell copies of the
+Software, and to permit persons to whom the Software is furnished to do so,
+provided that the above copyright notice(s) and this permission notice appear
+in all copies of the Software and that both the above copyright notice(s) and
+this permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
+LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not
+be used in advertising or otherwise to promote the sale, use or other
+dealings in this Software without prior written authorization of the
+copyright holder.
+
+All trademarks and registered trademarks mentioned herein are the property of
+their respective owners.
Index: contrib/collation/src/test/org/apache/lucene/collation/CollationTestBase.java
===================================================================
--- contrib/collation/src/test/org/apache/lucene/collation/CollationTestBase.java	(revision 0)
+++ contrib/collation/src/test/org/apache/lucene/collation/CollationTestBase.java	(revision 0)
@@ -0,0 +1,310 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RangeFilter;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.RangeQuery;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.util.IndexableBinaryStringTools;
+import org.apache.lucene.queryParser.analyzing.AnalyzingQueryParser;
+
+import java.text.Collator;
+import java.util.Locale;
+import java.io.IOException;
+import java.nio.CharBuffer;
+import java.nio.ByteBuffer;
+
+
+public class CollationTestBase extends TestCase {
+
+  protected String firstRangeBeginningOriginal = "\u062F";
+  protected String firstRangeEndOriginal = "\u0698";
+  
+  protected String secondRangeBeginningOriginal = "\u0633";
+  protected String secondRangeEndOriginal = "\u0638";
+  
+  /**
+   * Convenience method to perform the same function as CollationKeyFilter.
+   *  
+   * @param keyBits the result from 
+   *  collator.getCollationKey(original).toByteArray()
+   * @return The encoded collation key for the original String
+   */
+  protected String encodeCollationKey(byte[] keyBits) {
+    ByteBuffer begBuf = ByteBuffer.wrap(keyBits);
+    // Ensure that the backing char[] array is large enough to hold the encoded
+    // Binary String
+    char[] encodedBegArray 
+      = new char[IndexableBinaryStringTools.getEncodedLength(begBuf)];
+    CharBuffer encodedBegBuf = CharBuffer.wrap(encodedBegArray); 
+    IndexableBinaryStringTools.encode(begBuf, encodedBegBuf);
+    return new String(encodedBegArray);
+  }
+  
+  public void testFarsiQueryParserCollating(Analyzer analyzer) throws Exception {
+
+    RAMDirectory ramDir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter
+      (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field("content", "\u0633\u0627\u0628", 
+                      Field.Store.YES, Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    writer.close();
+    IndexSearcher is = new IndexSearcher(ramDir);
+
+    AnalyzingQueryParser aqp = new AnalyzingQueryParser("content", analyzer);
+    aqp.setLowercaseExpandedTerms(false);
+
+    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+    // orders the U+0698 character before the U+0633 character, so the single
+    // index Term below should NOT be returned by a ConstantScoreRangeQuery
+    // with a Farsi Collator (or an Arabic one for the case when Farsi is not
+    // supported).
+      
+    // Test ConstantScoreRangeQuery
+    aqp.setUseOldRangeQuery(false);
+    ScoreDoc[] result
+      = is.search(aqp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, result.length);
+
+    result = is.search(aqp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, result.length);
+
+    // Test RangeQuery
+    aqp.setUseOldRangeQuery(true);
+    result = is.search(aqp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, result.length);
+
+    result = is.search(aqp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, result.length);
+
+    is.close();
+  }
+  
+  
+  public void testFarsiRangeFilterCollating(Analyzer analyzer, String firstBeg, 
+                                            String firstEnd, String secondBeg,
+                                            String secondEnd) throws Exception {
+    RAMDirectory ramDir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter
+      (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field("content", "\u0633\u0627\u0628", 
+                      Field.Store.YES, Field.Index.ANALYZED));
+    doc.add(new Field("body", "body",
+                      Field.Store.YES, Field.Index.NOT_ANALYZED));
+    writer.addDocument(doc);
+    writer.close();
+    IndexSearcher searcher = new IndexSearcher(ramDir);
+    Query query = new TermQuery(new Term("body","body"));
+
+    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+    // orders the U+0698 character before the U+0633 character, so the single
+    // index Term below should NOT be returned by a RangeFilter with a Farsi
+    // Collator (or an Arabic one for the case when Farsi searcher not
+    // supported).
+    ScoreDoc[] result = searcher.search
+      (query, new RangeFilter("content", firstBeg, firstEnd, true, true), 1).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, result.length);
+
+    result = searcher.search
+      (query, new RangeFilter("content", secondBeg, secondEnd, true, true), 1).scoreDocs;
+    assertEquals("The index Term should be included.", 1, result.length);
+
+    searcher.close();
+  }
+ 
+  public void testFarsiRangeQueryCollating(Analyzer analyzer, String firstBeg, 
+                                            String firstEnd, String secondBeg,
+                                            String secondEnd) throws Exception {
+    RAMDirectory ramDir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter
+      (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+
+    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+    // orders the U+0698 character before the U+0633 character, so the single
+    // index Term below should NOT be returned by a RangeQuery with a Farsi
+    // Collator (or an Arabic one for the case when Farsi is not supported).
+    doc.add(new Field("content", "\u0633\u0627\u0628", 
+                      Field.Store.YES, Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    writer.close();
+    IndexSearcher searcher = new IndexSearcher(ramDir);
+
+    Query query = new RangeQuery(new Term("content", firstBeg),
+                                 new Term("content", firstEnd), true);
+    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, hits.length);
+
+    query = new RangeQuery(new Term("content", secondBeg),
+                           new Term("content", secondEnd), true);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, hits.length);
+    searcher.close();
+  }
+
+  public void testFarsiConstantScoreRangeQuery
+    (Analyzer analyzer, String firstBeg, String firstEnd, 
+     String secondBeg, String secondEnd) throws Exception {
+
+    RAMDirectory farsiIndex = new RAMDirectory();
+    IndexWriter writer = new IndexWriter
+      (farsiIndex, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field("content", "\u0633\u0627\u0628", 
+                      Field.Store.YES, Field.Index.ANALYZED));
+    doc.add(new Field("body", "body",
+                      Field.Store.YES, Field.Index.NOT_ANALYZED));
+    writer.addDocument(doc);
+    writer.close();
+
+    IndexReader reader = IndexReader.open(farsiIndex);
+    IndexSearcher search = new IndexSearcher(reader);
+        
+    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+    // orders the U+0698 character before the U+0633 character, so the single
+    // index Term below should NOT be returned by a ConstantScoreRangeQuery
+    // with a Farsi Collator (or an Arabic one for the case when Farsi is 
+    // not supported).
+    Query csrq 
+      = new ConstantScoreRangeQuery("content", firstBeg, firstEnd, true, true);
+    ScoreDoc[] result = search.search(csrq, null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, result.length);
+
+    csrq = new ConstantScoreRangeQuery
+      ("content", secondBeg, secondEnd, true, true);
+    result = search.search(csrq, null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, result.length);
+    search.close();
+  }
+  
+  // Test using various international locales with accented characters (which
+  // sort differently depending on locale)
+  //
+  // Copied (and slightly modified) from 
+  // org.apache.lucene.search.TestSort.testInternationalSort()
+  //  
+  public void testCollationKeySort(Analyzer usAnalyzer,
+                                   Analyzer franceAnalyzer,
+                                   Analyzer swedenAnalyzer,
+                                   Analyzer denmarkAnalyzer,
+                                   String usResult) throws Exception {
+    RAMDirectory indexStore = new RAMDirectory();
+    PerFieldAnalyzerWrapper analyzer
+      = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());
+    analyzer.addAnalyzer("US", usAnalyzer);
+    analyzer.addAnalyzer("France", franceAnalyzer);
+    analyzer.addAnalyzer("Sweden", swedenAnalyzer);
+    analyzer.addAnalyzer("Denmark", denmarkAnalyzer);
+    IndexWriter writer = new IndexWriter 
+      (indexStore, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+
+    // document data:
+    // the tracer field is used to determine which document was hit
+    String[][] sortData = new String[][] {
+      // tracer contents US                 France             Sweden (sv_SE)     Denmark (da_DK)
+      {  "A",   "x",     "p\u00EAche",      "p\u00EAche",      "p\u00EAche",      "p\u00EAche"      },
+      {  "B",   "y",     "HAT",             "HAT",             "HAT",             "HAT"             },
+      {  "C",   "x",     "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" },
+      {  "D",   "y",     "HUT",             "HUT",             "HUT",             "HUT"             },
+      {  "E",   "x",     "peach",           "peach",           "peach",           "peach"           },
+      {  "F",   "y",     "H\u00C5T",        "H\u00C5T",        "H\u00C5T",        "H\u00C5T"        },
+      {  "G",   "x",     "sin",             "sin",             "sin",             "sin"             },
+      {  "H",   "y",     "H\u00D8T",        "H\u00D8T",        "H\u00D8T",        "H\u00D8T"        },
+      {  "I",   "x",     "s\u00EDn",        "s\u00EDn",        "s\u00EDn",        "s\u00EDn"        },
+      {  "J",   "y",     "HOT",             "HOT",             "HOT",             "HOT"             },
+    };
+
+    for (int i = 0 ; i < sortData.length ; ++i) {
+      Document doc = new Document();
+      doc.add(new Field("tracer", sortData[i][0], 
+                        Field.Store.YES, Field.Index.NO));
+      doc.add(new Field("contents", sortData[i][1], 
+                        Field.Store.NO, Field.Index.ANALYZED));
+      if (sortData[i][2] != null) 
+        doc.add(new Field("US", sortData[i][2], 
+                          Field.Store.NO, Field.Index.ANALYZED));
+      if (sortData[i][3] != null) 
+        doc.add(new Field("France", sortData[i][3], 
+                          Field.Store.NO, Field.Index.ANALYZED));
+      if (sortData[i][4] != null)
+        doc.add(new Field("Sweden", sortData[i][4], 
+                          Field.Store.NO, Field.Index.ANALYZED));
+      if (sortData[i][5] != null) 
+        doc.add(new Field("Denmark", sortData[i][5], 
+                          Field.Store.NO, Field.Index.ANALYZED));
+      writer.addDocument(doc);
+    }
+    writer.optimize();
+    writer.close();
+    Searcher searcher = new IndexSearcher(indexStore);
+
+    Sort sort = new Sort();
+    Query queryX = new TermQuery(new Term ("contents", "x"));
+    Query queryY = new TermQuery(new Term ("contents", "y"));
+    
+    sort.setSort(new SortField("US", SortField.STRING));
+    assertMatches(searcher, queryY, sort, usResult);
+
+    sort.setSort(new SortField("France", SortField.STRING));
+    assertMatches(searcher, queryX, sort, "EACGI");
+
+    sort.setSort(new SortField("Sweden", SortField.STRING));
+    assertMatches(searcher, queryY, sort, "BJDFH");
+
+    sort.setSort(new SortField("Denmark", SortField.STRING));
+    assertMatches(searcher, queryY, sort, "BJDHF");
+  }
+    
+  // Make sure the documents returned by the search match the expected list
+  // Copied from TestSort.java
+  private void assertMatches(Searcher searcher, Query query, Sort sort, 
+                             String expectedResult) throws IOException {
+    ScoreDoc[] result = searcher.search(query, null, 1000, sort).scoreDocs;
+    StringBuffer buff = new StringBuffer(10);
+    int n = result.length;
+    for (int i = 0 ; i < n ; ++i) {
+      Document doc = searcher.doc(result[i].doc);
+      String[] v = doc.getValues("tracer");
+      for (int j = 0 ; j < v.length ; ++j) {
+        buff.append(v[j]);
+      }
+    }
+    assertEquals(expectedResult, buff.toString());
+  }
+}
Index: contrib/collation/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
===================================================================
--- contrib/collation/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java	(revision 0)
+++ contrib/collation/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java	(revision 0)
@@ -0,0 +1,82 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.Analyzer;
+
+import java.text.Collator;
+import java.util.Locale;
+
+
+public class TestCollationKeyAnalyzer extends CollationTestBase {
+
+  // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+  // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+  // characters properly.
+  private Collator collator = Collator.getInstance(new Locale("ar"));
+  private Analyzer analyzer = new CollationKeyAnalyzer(collator);
+
+  private String firstRangeBeginning = encodeCollationKey
+    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
+  private String firstRangeEnd = encodeCollationKey
+    (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
+  private String secondRangeBeginning = encodeCollationKey
+    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
+  private String secondRangeEnd = encodeCollationKey
+    (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
+  
+
+  public void testFarsiQueryParserCollating() throws Exception {
+    testFarsiQueryParserCollating(analyzer);
+  }
+  
+  public void testFarsiRangeFilterCollating() throws Exception {
+    testFarsiRangeFilterCollating
+      (analyzer, firstRangeBeginning, firstRangeEnd, 
+       secondRangeBeginning, secondRangeEnd);
+  }
+ 
+  public void testFarsiRangeQueryCollating() throws Exception {
+    testFarsiRangeQueryCollating
+      (analyzer, firstRangeBeginning, firstRangeEnd, 
+       secondRangeBeginning, secondRangeEnd);
+  }
+
+  public void testFarsiConstantScoreRangeQuery() throws Exception {
+    testFarsiConstantScoreRangeQuery
+      (analyzer, firstRangeBeginning, firstRangeEnd, 
+       secondRangeBeginning, secondRangeEnd);
+  }
+  
+  public void testCollationKeySort() throws Exception {
+    Analyzer usAnalyzer 
+      = new CollationKeyAnalyzer(Collator.getInstance(Locale.US));
+    Analyzer franceAnalyzer 
+      = new CollationKeyAnalyzer(Collator.getInstance(Locale.FRANCE));
+    Analyzer swedenAnalyzer 
+      = new CollationKeyAnalyzer(Collator.getInstance(new Locale("sv", "se")));
+    Analyzer denmarkAnalyzer 
+      = new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
+    
+    // The ICU Collator and java.text.Collator implementations differ in their
+    // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
+    testCollationKeySort
+      (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH");
+  }
+}
Index: contrib/collation/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
===================================================================
--- contrib/collation/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java	(revision 0)
+++ contrib/collation/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java	(revision 0)
@@ -0,0 +1,99 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+
+import java.text.Collator;
+import java.util.Locale;
+import java.io.Reader;
+
+
+public class TestCollationKeyFilter extends CollationTestBase {
+
+  // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+  // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+  // characters properly.
+  private Collator collator = Collator.getInstance(new Locale("ar"));
+  private Analyzer analyzer = new TestAnalyzer(collator);
+
+  private String firstRangeBeginning = encodeCollationKey
+    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
+  private String firstRangeEnd = encodeCollationKey
+    (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
+  private String secondRangeBeginning = encodeCollationKey
+    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
+  private String secondRangeEnd = encodeCollationKey
+    (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
+
+  
+  public class TestAnalyzer extends Analyzer {
+    private Collator collator;
+
+    TestAnalyzer(Collator collator) {
+      this.collator = collator;
+    }
+
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      TokenStream result = new KeywordTokenizer(reader);
+      result = new CollationKeyFilter(result, collator);
+      return result;
+    }
+  }
+
+  public void testFarsiQueryParserCollating() throws Exception {
+    testFarsiQueryParserCollating(analyzer);
+  }
+  
+  
+  public void testFarsiRangeFilterCollating() throws Exception {
+    testFarsiRangeFilterCollating
+      (analyzer, firstRangeBeginning, firstRangeEnd, 
+       secondRangeBeginning, secondRangeEnd);
+  }
+ 
+  public void testFarsiRangeQueryCollating() throws Exception {
+    testFarsiRangeQueryCollating
+      (analyzer, firstRangeBeginning, firstRangeEnd, 
+       secondRangeBeginning, secondRangeEnd);
+  }
+
+  public void testFarsiConstantScoreRangeQuery() throws Exception {
+    testFarsiConstantScoreRangeQuery
+      (analyzer, firstRangeBeginning, firstRangeEnd, 
+       secondRangeBeginning, secondRangeEnd);
+  }
+  
+  public void testCollationKeySort() throws Exception {
+    Analyzer usAnalyzer = new TestAnalyzer(Collator.getInstance(Locale.US));
+    Analyzer franceAnalyzer 
+      = new TestAnalyzer(Collator.getInstance(Locale.FRANCE));
+    Analyzer swedenAnalyzer 
+      = new TestAnalyzer(Collator.getInstance(new Locale("sv", "se")));
+    Analyzer denmarkAnalyzer 
+      = new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
+    
+    // The ICU Collator and java.text.Collator implementations differ in their
+    // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
+    testCollationKeySort
+      (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH");
+  }
+}
Index: contrib/collation/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
===================================================================
--- contrib/collation/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java	(revision 0)
+++ contrib/collation/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java	(revision 0)
@@ -0,0 +1,86 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import com.ibm.icu.text.Collator;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+
+import java.io.Reader;
+import java.util.Locale;
+
+
+public class TestICUCollationKeyAnalyzer extends CollationTestBase {
+
+  private Collator collator = Collator.getInstance(new Locale("fa"));
+  private Analyzer analyzer = new ICUCollationKeyAnalyzer(collator);
+
+  private String firstRangeBeginning = encodeCollationKey
+    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
+  private String firstRangeEnd = encodeCollationKey
+    (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
+  private String secondRangeBeginning = encodeCollationKey
+    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
+  private String secondRangeEnd = encodeCollationKey
+    (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
+
+  
+  public void testFarsiQueryParserCollating() throws Exception {
+    testFarsiQueryParserCollating(analyzer);
+  }
+  
+  public void testFarsiRangeFilterCollating() throws Exception {
+    testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd, 
+                                  secondRangeBeginning, secondRangeEnd);
+  }
+ 
+  public void testFarsiRangeQueryCollating() throws Exception {
+    testFarsiRangeQueryCollating(analyzer, firstRangeBeginning, firstRangeEnd, 
+                                 secondRangeBeginning, secondRangeEnd);
+  }
+
+  public void testFarsiConstantScoreRangeQuery() throws Exception {
+    testFarsiConstantScoreRangeQuery
+      (analyzer, firstRangeBeginning, firstRangeEnd, 
+       secondRangeBeginning, secondRangeEnd);
+  }
+
+  // Test using various international locales with accented characters (which
+  // sort differently depending on locale)
+  //
+  // Copied (and slightly modified) from 
+  // org.apache.lucene.search.TestSort.testInternationalSort()
+  //  
+  public void testCollationKeySort() throws Exception {
+    Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
+      (Collator.getInstance(Locale.US));
+    Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
+      (Collator.getInstance(Locale.FRANCE));
+    Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
+      (Collator.getInstance(new Locale("sv", "se")));
+    Analyzer denmarkAnalyzer = new ICUCollationKeyAnalyzer
+      (Collator.getInstance(new Locale("da", "dk")));
+
+    // The ICU Collator and java.text.Collator implementations differ in their
+    // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
+    testCollationKeySort
+      (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD");
+  }
+}
Index: contrib/collation/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java
===================================================================
--- contrib/collation/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java	(revision 0)
+++ contrib/collation/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java	(revision 0)
@@ -0,0 +1,100 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import com.ibm.icu.text.Collator;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+
+import java.io.Reader;
+import java.util.Locale;
+
+
+public class TestICUCollationKeyFilter extends CollationTestBase {
+
+  private Collator collator = Collator.getInstance(new Locale("fa"));
+  private Analyzer analyzer = new TestAnalyzer(collator);
+
+  private String firstRangeBeginning = encodeCollationKey
+    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
+  private String firstRangeEnd = encodeCollationKey
+    (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
+  private String secondRangeBeginning = encodeCollationKey
+    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
+  private String secondRangeEnd = encodeCollationKey
+    (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
+
+  
+  public class TestAnalyzer extends Analyzer {
+    private Collator collator;
+
+    TestAnalyzer(Collator collator) {
+      this.collator = collator;
+    }
+
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      TokenStream result = new KeywordTokenizer(reader);
+      result = new ICUCollationKeyFilter(result, collator);
+      return result;
+    }
+  }
+
+  public void testFarsiQueryParserCollating() throws Exception {
+    testFarsiQueryParserCollating(analyzer);
+  }
+  
+  
+  public void testFarsiRangeFilterCollating() throws Exception {
+    testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd, 
+                                  secondRangeBeginning, secondRangeEnd);
+  }
+ 
+  public void testFarsiRangeQueryCollating() throws Exception {
+    testFarsiRangeQueryCollating(analyzer, firstRangeBeginning, firstRangeEnd, 
+                                 secondRangeBeginning, secondRangeEnd);
+  }
+
+  public void testFarsiConstantScoreRangeQuery() throws Exception {
+    testFarsiConstantScoreRangeQuery
+      (analyzer, firstRangeBeginning, firstRangeEnd, 
+       secondRangeBeginning, secondRangeEnd);
+  }
+
+  // Test using various international locales with accented characters (which
+  // sort differently depending on locale)
+  //
+  // Copied (and slightly modified) from 
+  // org.apache.lucene.search.TestSort.testInternationalSort()
+  //  
+  public void testCollationKeySort() throws Exception {
+    Analyzer usAnalyzer = new TestAnalyzer(Collator.getInstance(Locale.US));
+    Analyzer franceAnalyzer 
+      = new TestAnalyzer(Collator.getInstance(Locale.FRANCE));
+    Analyzer swedenAnalyzer 
+      = new TestAnalyzer(Collator.getInstance(new Locale("sv", "se")));
+    Analyzer denmarkAnalyzer 
+      = new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
+
+    // The ICU Collator and java.text.Collator implementations differ in their
+    // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
+    testCollationKeySort
+      (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD");
+  }
+}
Index: contrib/collation/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
===================================================================
--- contrib/collation/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java	(revision 0)
+++ contrib/collation/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java	(revision 0)
@@ -0,0 +1,105 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+
+import java.text.Collator;
+import java.io.Reader;
+import java.io.IOException;
+
+/**
+ * <p>
+ *   Filters {@link KeywordTokenizer} with {@link CollationKeyFilter}.
+ * </p>
+ * <p>
+ *   Converts the token into its {@link java.text.CollationKey}, and then
+ *   encodes the CollationKey with 
+ *   {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow 
+ *   it to be stored as an index term.
+ * </p>
+ * <p>
+ *   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+ *   index and query time -- CollationKeys are only comparable when produced by
+ *   the same Collator.  Since {@link java.text.RuleBasedCollator}s are not
+ *   independently versioned, it is unsafe to search against stored
+ *   CollationKeys unless the following are exactly the same (best practice is
+ *   to store this information with the index and check that they remain the
+ *   same at query time):
+ * </p>
+ * <ol>
+ *   <li>JVM vendor</li>
+ *   <li>JVM version, including patch version</li>
+ *   <li>
+ *     The language (and country and variant, if specified) of the Locale
+ *     used when constructing the collator via
+ *     {@link Collator#getInstance(java.util.Locale)}.
+ *   </li>
+ *   <li>
+ *     The collation strength used - see {@link Collator#setStrength(int)}
+ *   </li>
+ * </ol> 
+ * <p>
+ *   NB 1: {@link ICUCollationKeyAnalyzer} uses ICU4J's Collator, which makes 
+ *   its version available, thus allowing collation to be versioned
+ *   independently from the JVM.
+ * </p>
+ * <p>
+ *   NB 2: CollationKeys generated by java.text.Collators are not compatible
+ *   with those those generated by ICU Collators.  Specifically, if you use 
+ *   CollationKeyAnalyzer to generate index terms, do not use
+ *   ICUCollationKeyAnalyzer on the query side, or vice versa.
+ * </p>
+ */
+public class CollationKeyAnalyzer extends Analyzer {
+  private Collator collator;
+
+  CollationKeyAnalyzer(Collator collator) {
+    this.collator = collator;
+  }
+
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream result = new KeywordTokenizer(reader);
+    result = new CollationKeyFilter(result, collator);
+    return result;
+  }
+  
+  private class SavedStreams {
+    Tokenizer source;
+    TokenStream result;
+  }
+  
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) 
+    throws IOException {
+    
+    SavedStreams streams = (SavedStreams)getPreviousTokenStream();
+    if (streams == null) {
+      streams = new SavedStreams();
+      streams.source = new KeywordTokenizer(reader);
+      streams.result = new CollationKeyFilter(streams.source, collator);
+      setPreviousTokenStream(streams);
+    } else {
+      streams.source.reset(reader);
+    }
+    return streams.result;
+  }
+}
Index: contrib/collation/src/java/org/apache/lucene/collation/CollationKeyFilter.java
===================================================================
--- contrib/collation/src/java/org/apache/lucene/collation/CollationKeyFilter.java	(revision 0)
+++ contrib/collation/src/java/org/apache/lucene/collation/CollationKeyFilter.java	(revision 0)
@@ -0,0 +1,102 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.util.IndexableBinaryStringTools;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.text.Collator;
+
+
+/**
+ * <p>
+ *   Converts each token into its {@link java.text.CollationKey}, and then
+ *   encodes the CollationKey with {@link IndexableBinaryStringTools}, to allow 
+ *   it to be stored as an index term.
+ * </p>
+ * <p>
+ *   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+ *   index and query time -- CollationKeys are only comparable when produced by
+ *   the same Collator.  Since {@link java.text.RuleBasedCollator}s are not
+ *   independently versioned, it is unsafe to search against stored
+ *   CollationKeys unless the following are exactly the same (best practice is
+ *   to store this information with the index and check that they remain the
+ *   same at query time):
+ * </p>
+ * <ol>
+ *   <li>JVM vendor</li>
+ *   <li>JVM version, including patch version</li>
+ *   <li>
+ *     The language (and country and variant, if specified) of the Locale
+ *     used when constructing the collator via
+ *     {@link Collator#getInstance(java.util.Locale)}.
+ *   </li>
+ *   <li>
+ *     The collation strength used - see {@link Collator#setStrength(int)}
+ *   </li>
+ * </ol> 
+ * <p>
+ *   NB 1: {@link ICUCollationKeyFilter} uses ICU4J's Collator, which makes its
+ *   version available, thus allowing collation to be versioned independently
+ *   from the JVM.
+ * </p>
+ * <p>
+ *   NB 2: CollationKeys generated by java.text.Collators are not compatible
+ *   with those those generated by ICU Collators.  Specifically, if you use 
+ *   CollationKeyFilter to generate index terms, do not use
+ *   {@link ICUCollationKeyFilter} on the query side, or vice versa.
+ * </p>
+ */
+public class CollationKeyFilter extends TokenFilter {
+  private Collator collator = null;
+
+  /**
+   * @param input Source token stream
+   * @param collator CollationKey generator
+   */
+  public CollationKeyFilter(TokenStream input, Collator collator) {
+    super(input);
+    this.collator = collator;
+  }
+
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken != null) {
+      char[] termBuffer = nextToken.termBuffer();
+      String termText = new String(termBuffer, 0, nextToken.termLength());
+      byte[] collationKey = collator.getCollationKey(termText).toByteArray();
+      ByteBuffer collationKeyBuf = ByteBuffer.wrap(collationKey);
+      int encodedLength
+        = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
+      if (encodedLength > termBuffer.length) {
+        nextToken.resizeTermBuffer(encodedLength);
+      }
+      nextToken.setTermLength(encodedLength);
+      CharBuffer wrappedTermBuffer = CharBuffer.wrap(nextToken.termBuffer());
+      IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+    }
+    return nextToken;
+  }
+}
Index: contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java
===================================================================
--- contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java	(revision 0)
+++ contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java	(revision 0)
@@ -0,0 +1,96 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import com.ibm.icu.text.Collator;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+
+import java.io.Reader;
+import java.io.IOException;
+
+
+/**
+ * <p>
+ *   Filters {@link KeywordTokenizer} with {@link ICUCollationKeyFilter}.
+ * <p>
+ *   Converts the token into its {@link com.ibm.icu.text.CollationKey}, and
+ *   then encodes the CollationKey with 
+ *   {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow it to
+ *   be stored as an index term.
+ * </p>
+ * <p>
+ *   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+ *   index and query time -- CollationKeys are only comparable when produced by
+ *   the same Collator.  {@link com.ibm.icu.text.RuleBasedCollator}s are 
+ *   independently versioned, so it is safe to search against stored
+ *   CollationKeys if the following are exactly the same (best practice is
+ *   to store this information with the index and check that they remain the
+ *   same at query time):
+ * </p>
+ * <ol>
+ *   <li>
+ *     Collator version - see {@link Collator#getVersion()}
+ *   </li>
+ *   <li>
+ *     The collation strength used - see {@link Collator#setStrength(int)}
+ *   </li>
+ * </ol> 
+ * <p>
+ *   NB: CollationKeys generated by ICU Collators are not compatible with those
+ *   generated by java.text.Collators.  Specifically, if you use 
+ *   ICUCollationKeyAnalyzer to generate index terms, do not use 
+ *   {@link CollationKeyAnalyzer} on the query side, or vice versa.
+ * </p>
+ */
+public class ICUCollationKeyAnalyzer extends Analyzer {
+  private Collator collator;
+
+  ICUCollationKeyAnalyzer(Collator collator) {
+    this.collator = collator;
+  }
+
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream result = new KeywordTokenizer(reader);
+    result = new ICUCollationKeyFilter(result, collator);
+    return result;
+  }
+  
+  private class SavedStreams {
+    Tokenizer source;
+    TokenStream result;
+  }
+  
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) 
+    throws IOException {
+    
+    SavedStreams streams = (SavedStreams)getPreviousTokenStream();
+    if (streams == null) {
+      streams = new SavedStreams();
+      streams.source = new KeywordTokenizer(reader);
+      streams.result = new ICUCollationKeyFilter(streams.source, collator);
+      setPreviousTokenStream(streams);
+    } else {
+      streams.source.reset(reader);
+    }
+    return streams.result;
+  }
+}
Index: contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
===================================================================
--- contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java	(revision 0)
+++ contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java	(revision 0)
@@ -0,0 +1,94 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import com.ibm.icu.text.Collator;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.util.IndexableBinaryStringTools;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+
+
+/**
+ * <p>
+ *   Converts each token into its {@link com.ibm.icu.text.CollationKey}, and
+ *   then encodes the CollationKey with {@link IndexableBinaryStringTools}, to
+ *   allow it to be stored as an index term.
+ * </p>
+ * <p>
+ *   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+ *   index and query time -- CollationKeys are only comparable when produced by
+ *   the same Collator.  {@link com.ibm.icu.text.RuleBasedCollator}s are 
+ *   independently versioned, so it is safe to search against stored
+ *   CollationKeys if the following are exactly the same (best practice is
+ *   to store this information with the index and check that they remain the
+ *   same at query time):
+ * </p>
+ * <ol>
+ *   <li>
+ *     Collator version - see {@link Collator#getVersion()}
+ *   </li>
+ *   <li>
+ *     The collation strength used - see {@link Collator#setStrength(int)}
+ *   </li>
+ * </ol> 
+ * <p>
+ *   NB: CollationKeys generated by ICU Collators are not compatible with those
+ *   generated by java.text.Collators.  Specifically, if you use 
+ *   ICUCollationKeyFilter to generate index terms, do not use 
+ *   {@link CollationKeyFilter} on the query side, or vice versa.
+ * </p>
+ */
+public class ICUCollationKeyFilter extends TokenFilter {
+  private Collator collator = null;
+
+  /**
+   * 
+   * @param input Source token stream
+   * @param collator CollationKey generator
+   */
+  public ICUCollationKeyFilter(TokenStream input, Collator collator) {
+    super(input);
+    this.collator = collator;
+  }
+
+  public final Token next(final Token reusableToken) throws IOException {
+    assert reusableToken != null;
+    Token nextToken = input.next(reusableToken);
+    if (nextToken != null) {
+      char[] termBuffer = nextToken.termBuffer();
+      String termText = new String(termBuffer, 0, nextToken.termLength());
+      byte[] collationKey = collator.getCollationKey(termText).toByteArray();
+      ByteBuffer collationKeyBuf = ByteBuffer.wrap(collationKey);
+      int encodedLength
+        = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
+      if (encodedLength > termBuffer.length) {
+        nextToken.resizeTermBuffer(encodedLength);
+      }
+      nextToken.setTermLength(encodedLength);
+      CharBuffer wrappedTermBuffer = CharBuffer.wrap(nextToken.termBuffer());
+      IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+    }
+    return nextToken;
+  }
+}
Index: contrib/collation/src/java/overview.html
===================================================================
--- contrib/collation/src/java/overview.html	(revision 0)
+++ contrib/collation/src/java/overview.html	(revision 0)
@@ -0,0 +1,10 @@
+<html>
+  <head>
+    <title>
+      Apache Lucene CollationKeyFilter/Analyzer and 
+      ICUCollationKeyFilter/Analyzer
+    </title>
+  </head>
+  <body>
+  </body>
+</html>
\ No newline at end of file
Index: contrib/collation/build.xml
===================================================================
--- contrib/collation/build.xml	(revision 0)
+++ contrib/collation/build.xml	(revision 0)
@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="collation" default="default">
+
+  <description>
+    CollationKeyFilter, ICUCollationKeyFilter, CollationKeyAnalyzer, and
+    ICUCollationKeyAnalyzer - converts tokens into indexable collation keys
+  </description>
+
+
+  <path id="additional.dependencies">
+    <fileset dir="lib" includes="icu4j-*.jar"/>
+    <pathelement location="../../build/contrib/misc/classes/java"/>
+  </path>
+
+  <pathconvert property="project.classpath"
+               targetos="unix"
+               refid="additional.dependencies"
+  />
+
+  <import file="../contrib-build.xml"/>
+
+  <target name="compile-misc">
+    <subant target="compile">
+       <fileset dir="${common.dir}/contrib/miscellaneous" includes="build.xml"/>
+    </subant>
+  </target>
+
+  <target name="init" depends="common.init,compile-misc"/>
+
+  <target name="compile" depends="init">
+    <antcall target="common.compile" inheritRefs="true" />
+  </target>
+
+</project>
Index: lucene-contrib-pom.xml.template
===================================================================
--- lucene-contrib-pom.xml.template	(revision 755833)
+++ lucene-contrib-pom.xml.template	(working copy)
@@ -44,5 +44,6 @@
     <commons-collections-version>3.1</commons-collections-version>
     <commons-beanutils-version>1.7.0</commons-beanutils-version>
     <jakarta-regexp-version>1.4</jakarta-regexp-version>
+    <icu-version>4.0</icu-version>
   </properties>
 </project>
