Index: lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java
===================================================================
--- lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java	(revision 0)
+++ lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java	(working copy)
@@ -0,0 +1,107 @@
+package org.apache.lucene.analysis.icu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharReader;
+import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.ngram.NGramTokenizer;
+
+import com.ibm.icu.text.Normalizer2;
+
+public class TestICUNormalizer2CharFilter extends BaseTokenStreamTestCase {
+  
+  public void testNormalization() throws IOException {
+    String input = "ʰ㌰゙5℃№㈱㌘，バッファーの正規化のテスト．㋐㋑㋒㋓㋔ｶｷｸｹｺｻﾞｼﾞｽﾞｾﾞｿﾞg̈각/각நிเกषिchkʷक्षि";
+    Normalizer2 referenceNormalizer = Normalizer2.getInstance(null, "nfkc_cf",
+        Normalizer2.Mode.COMPOSE);
+    String expectedOutput = referenceNormalizer.normalize(input);
+    
+    CharStream reader = new ICUNormalizer2CharFilter(CharReader.get(new StringReader(input)));
+    char[] tempBuff = new char[10];
+    StringBuilder output = new StringBuilder();
+    while (true) {
+      int length = reader.read(tempBuff);
+      if (length == -1) {
+        break;
+      }
+      output.append(tempBuff, 0, length);
+      assertEquals(
+          output.toString(),
+          referenceNormalizer.normalize(input.substring(0,
+              reader.correctOffset(output.length()))));
+    }
+    
+    assertEquals(expectedOutput, output.toString());
+  }
+  
+  public void testTokenStream() throws IOException {
+    // '℃', '№', '㈱', '㌘', 'ｻ'+'<<', 'ｿ'+'<<', '㌰'+'<<'
+    String input = "℃ № ㈱ ㌘ ｻﾞ ｿﾞ ㌰ﾞ";
+
+    CharStream reader = new ICUNormalizer2CharFilter(CharReader.get(new StringReader(input)), Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE));
+
+    TokenStream tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    
+    assertTokenStreamContents(tokenStream, 
+        new String[] {"°C", "No", "(株)", "グラム", "ザ", "ゾ", "ピゴ"}, 
+        new int[] {0, 2, 4, 6, 8, 11, 14}, 
+        new int[] {1, 3, 5, 7, 10, 13, 16},
+        input.length());
+  }
+  
+  public void testTokenStream2() throws IOException {
+    // '㌰', '<<'゙, '5', '℃', '№', '㈱', '㌘', 'ｻ', '<<', 'ｿ', '<<'
+    String input = "㌰゙5℃№㈱㌘ｻﾞｿﾞ";
+
+    CharStream reader = new ICUNormalizer2CharFilter(CharReader.get(new StringReader(input)));
+
+    TokenStream tokenStream = new NGramTokenizer(reader, 1,1);
+    
+    assertTokenStreamContents(tokenStream,
+        new String[] {"ピ", "ゴ", "5", "°", "c", "n", "o", "(", "株", ")", "グ", "ラ", "ム", "ザ", "ゾ"},
+        new int[]{0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9},
+        new int[]{1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9, 11},
+        input.length()
+      );
+  }
+  
+  public void testRandomStrings() throws IOException {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+      }
+
+      @Override
+      protected Reader initReader(Reader reader) {
+        return new ICUNormalizer2CharFilter(CharReader.get(reader));
+      }
+    };
+    checkRandomData(random(), a, 10000*RANDOM_MULTIPLIER);
+  }
+  
+}

Property changes on: lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
Index: lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java
===================================================================
--- lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java	(revision 0)
+++ lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java	(working copy)
@@ -0,0 +1,226 @@
+package org.apache.lucene.analysis.icu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.charfilter.BaseCharFilter;
+import org.apache.lucene.analysis.CharStream;
+
+import com.ibm.icu.text.Normalizer2;
+
+/** Just like {@link ICUNormalizer2Filter}, but as a CharFilter **/
+public class ICUNormalizer2CharFilter extends BaseCharFilter { 
+  private static final int IO_BUFFER_SIZE = 128;
+  
+  private final Normalizer2 normalizer;
+  private final StringBuilder inputBuffer = new StringBuilder();
+  private final StringBuilder resultBuffer = new StringBuilder();
+  
+  private boolean inputFinished;
+  private boolean afterQuickCheckYes;
+  private int checkedInputBoundary;
+  private int charCount;
+  
+  /**
+   * default is NFKC_CF
+   */
+  public ICUNormalizer2CharFilter(CharStream in) {
+    this(in, Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
+  }
+    
+  public ICUNormalizer2CharFilter(CharStream in, Normalizer2 normalizer) {
+    super(in);
+    assert normalizer != null;
+    this.normalizer = normalizer;
+  }
+  
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    resetFields();
+  }
+  
+  private void resetFields() {
+    inputBuffer.delete(0, inputBuffer.length());
+    checkedInputBoundary = 0;
+    resultBuffer.delete(0, resultBuffer.length());
+    inputFinished = false;
+    afterQuickCheckYes = false;
+    charCount = 0;
+  }
+  
+  @Override
+  public void close() throws IOException {
+    resetFields();
+    super.close();
+  }
+  
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    if (off < 0) throw new IllegalArgumentException("off < 0");
+    if (off >= cbuf.length) throw new IllegalArgumentException(
+        "off >= cbuf.length");
+    if (len <= 0) throw new IllegalArgumentException("len <= 0");
+    
+    while (!inputFinished || inputBuffer.length() > 0 || resultBuffer.length() > 0) {
+      int retLen;
+      
+      if (resultBuffer.length() > 0) {
+        retLen = outputFromResultBuffer(cbuf, off, len);
+        if (retLen > 0) {
+          return retLen;
+        }
+      }
+      
+      int resLen = readAndNormalizeFromInput();
+      if (resLen > 0) {
+        retLen = outputFromResultBuffer(cbuf, off, len);
+        if (retLen > 0) {
+          return retLen;
+        }
+      }
+      
+      readInputToBuffer();
+    }
+    
+    return -1;
+  }
+  
+  private final char[] tmpBuffer = new char[IO_BUFFER_SIZE];
+  
+  private int readInputToBuffer() throws IOException {
+    final int len = input.read(tmpBuffer);
+    if (len == -1) {
+      inputFinished = true;
+      return 0;
+    }
+    inputBuffer.append(tmpBuffer, 0, len);
+    return len;
+  }
+  
+  private int readAndNormalizeFromInput() {
+    if (inputBuffer.length() <= 0) {
+      afterQuickCheckYes = false;
+      return 0;
+    }
+    if (!afterQuickCheckYes) {
+      int resLen = readFromInputWhileSpanQuickCheckYes();
+      afterQuickCheckYes = true;
+      if (resLen > 0) return resLen;
+    }
+    int resLen = readFromIoNormalizeUptoBoundary(resultBuffer);
+    if(resLen > 0){
+      afterQuickCheckYes = false;
+    }
+    return resLen;
+  }
+  
+  private int readFromInputWhileSpanQuickCheckYes() {
+    int end = normalizer.spanQuickCheckYes(inputBuffer);
+    if (end > 0) {
+      resultBuffer.append(inputBuffer.subSequence(0, end));
+      inputBuffer.delete(0, end);
+      checkedInputBoundary = Math.max(checkedInputBoundary - end, 0);
+      charCount += end;
+    }
+    return end;
+  }
+  
+  private int readFromIoNormalizeUptoBoundary(StringBuilder dest) {
+    if (inputBuffer.length() <= 0) {
+      return 0;
+    }
+    
+    boolean foundBoundary = false;
+    final int bufLen = inputBuffer.length();
+    
+    while (checkedInputBoundary < bufLen - 1) {
+      ++checkedInputBoundary;
+      if (normalizer.hasBoundaryBefore(inputBuffer
+          .charAt(checkedInputBoundary))) {
+        foundBoundary = true;
+        break;
+      } else if (normalizer.hasBoundaryAfter(inputBuffer
+          .charAt(checkedInputBoundary - 1))) {
+        foundBoundary = true;
+        break;
+      }
+    }
+    if (checkedInputBoundary == bufLen - 1) {
+      if (normalizer.hasBoundaryAfter(inputBuffer.charAt(checkedInputBoundary))
+          || inputFinished) {
+        foundBoundary = true;
+        ++checkedInputBoundary;
+      }
+    }
+    if (!foundBoundary) {
+      return 0;
+    }
+    
+    return normalizeInputUpto(checkedInputBoundary);
+  }
+  
+  private int normalizeInputUpto(final int length) {
+    final int destOrigLen = resultBuffer.length();
+    normalizer.normalizeSecondAndAppend(resultBuffer,
+        inputBuffer.subSequence(0, length));
+    inputBuffer.delete(0, length);
+    checkedInputBoundary = Math.max(checkedInputBoundary - length, 0);
+    final int resultLength = resultBuffer.length() - destOrigLen;
+    recordOffsetDiff(length, resultLength);
+    return resultLength;
+  }
+  
+  private void recordOffsetDiff(int inputLength, int outputLength) {
+    if (inputLength == outputLength) {
+      charCount += outputLength;
+      return;
+    }
+    final int diff = inputLength - outputLength;
+    final int cumuDiff = getLastCumulativeDiff();
+    if (diff < 0) {
+      for (int i = 1;  i <= -diff; ++i) {
+        addOffCorrectMap(charCount + i, cumuDiff - i);
+      }
+    } else {
+      addOffCorrectMap(charCount + 1, cumuDiff + diff);
+    }
+    charCount += outputLength;
+  }
+  
+  private int outputFromResultBuffer(char[] cbuf, int begin, int len) {
+    len = Math.min(resultBuffer.length(), len);
+    resultBuffer.getChars(0, len, cbuf, begin);
+    if (len > 0) {
+      resultBuffer.delete(0, len);
+    }
+    return len;
+  }
+  
+  @Override
+  public boolean markSupported() {
+    return false;
+  }
+  
+  @Override
+  public void mark(int readAheadLimit) throws IOException {
+    throw new UnsupportedOperationException("mark() not supported");
+  }
+  
+}
\ No newline at end of file

Property changes on: lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
Index: solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2CharFilterFactory.java
===================================================================
--- solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2CharFilterFactory.java	(revision 0)
+++ solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2CharFilterFactory.java	(working copy)
@@ -0,0 +1,43 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.icu.ICUNormalizer2CharFilter;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+
+import com.ibm.icu.text.Normalizer2;
+
+/** Factory for {@link ICUNormalizer2CharFilter} */
+public class ICUNormalizer2CharFilterFactory extends CharFilterFactory {
+  private Normalizer2 normalizer;
+
+  // TODO: support custom normalization
+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
+    normalizer = ICUNormalizer2FilterFactory.parseNormalizer(args);
+  }
+  
+  @Override
+  public CharStream create(CharStream in) {
+    return new ICUNormalizer2CharFilter(in, normalizer);
+  }
+}

Property changes on: solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2CharFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
Index: solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java
===================================================================
--- solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java	(revision 1342632)
+++ solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUNormalizer2FilterFactory.java	(working copy)
@@ -50,10 +50,15 @@
 public class ICUNormalizer2FilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
   private Normalizer2 normalizer;
 
-  // TODO: support custom normalization
   @Override
   public void init(Map<String,String> args) {
     super.init(args);
+    normalizer = parseNormalizer(args);
+  }
+
+  // TODO: support custom normalization
+  static Normalizer2 parseNormalizer(Map<String,String> args) {
+    Normalizer2 normalizer;
     String name = args.get("name");
     if (name == null)
       name = "nfkc_cf";
@@ -76,6 +81,8 @@
         normalizer = new FilteredNormalizer2(normalizer, set);
       }
     }
+    
+    return normalizer;
   }
   
   public TokenStream create(TokenStream input) {
