Index: contrib/analyzers/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java
===================================================================
--- contrib/analyzers/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java	(revision 0)
+++ contrib/analyzers/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java	(revision 0)
@@ -0,0 +1,155 @@
+package org.apache.lucene.analysis.cjk;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.Token;
+
+
+public class TestCJKTokenizer extends TestCase{
+
+  public Token newToken(String termText, int start, int end, int type) {
+    Token token = new Token(start, end);
+    token.setTermBuffer(termText);
+    token.setType(CJKTokenizer.TOKEN_TYPE_NAMES[type]);
+    return token;
+  }
+
+  public void checkCJKToken(final String str, final Token[] out_tokens) throws IOException {
+    CJKTokenizer tokenizer = new CJKTokenizer(new StringReader(str));
+    int i = 0;
+    System.out.println("string[" + str + "]");
+    System.out.print("tokens[");
+    final Token reusableToken = new Token();
+    for (Token token = tokenizer.next(reusableToken) ;
+         token != null                               ; 
+         token = tokenizer.next(reusableToken)       ) {
+      if (token.term().equals(out_tokens[i].term()) 
+          && token.startOffset() == out_tokens[i].startOffset() 
+          && token.endOffset() == out_tokens[i].endOffset() 
+          && token.type().equals(out_tokens[i].type()) ) {
+        System.out.print( token.term() + " ");
+      }
+      else {
+        fail(token.term() + " (start: " + token.startOffset() 
+             + " end: " + token.endOffset() + " type: " + token.type() + ") != "
+             + out_tokens[i].term() + " (start: " + out_tokens[i].startOffset() 
+             + " end: " + out_tokens[i].endOffset() 
+             + " type: " + out_tokens[i].type() + ")");
+        break;
+      }
+      ++i;
+    }
+    System.out.println("]" + System.getProperty("line.separator"));
+  }
+  
+  public void testJa1() throws IOException {
+    String str = "一二三四五六七八九十";
+       
+    Token[] out_tokens = { 
+      newToken("一二", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("二三", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("三四", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("四五", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("五六", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("六七", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("七八", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("八九", 7, 9, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("九十", 8,10, CJKTokenizer.DOUBLE_TOKEN_TYPE)
+    };
+    checkCJKToken(str, out_tokens);
+  }
+  
+  public void testJa2() throws IOException {
+    String str = "一 二三四 五六七八九 十";
+       
+    Token[] out_tokens = { 
+      newToken("一", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("二三", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("三四", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("五六", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("六七", 7, 9, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("七八", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("八九", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("十", 12,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)
+    };
+    checkCJKToken(str, out_tokens);
+  }
+  
+  public void testC() throws IOException {
+    String str = "abc defgh ijklmn opqrstu vwxy z";
+       
+    Token[] out_tokens = { 
+      newToken("abc", 0, 3, CJKTokenizer.SINGLE_TOKEN_TYPE), 
+      newToken("defgh", 4, 9, CJKTokenizer.SINGLE_TOKEN_TYPE),
+      newToken("ijklmn", 10, 16, CJKTokenizer.SINGLE_TOKEN_TYPE),
+      newToken("opqrstu", 17, 24, CJKTokenizer.SINGLE_TOKEN_TYPE), 
+      newToken("vwxy", 25, 29, CJKTokenizer.SINGLE_TOKEN_TYPE), 
+      newToken("z", 30, 31, CJKTokenizer.SINGLE_TOKEN_TYPE),
+    };
+    checkCJKToken(str, out_tokens);
+  }
+  
+  public void testMix() throws IOException {
+    String str = "あいうえおabcかきくけこ";
+       
+    Token[] out_tokens = { 
+      newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("いう", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("うえ", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("えお", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), 
+      newToken("かき", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("きく", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("くけ", 10,12, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("けこ", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)
+    };
+    checkCJKToken(str, out_tokens);
+  }
+  
+  public void testMix2() throws IOException {
+    String str = "あいうえおabんcかきくけ こ";
+       
+    Token[] out_tokens = { 
+      newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("いう", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("うえ", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("えお", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), 
+      newToken("ん", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+      newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), 
+      newToken("かき", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("きく", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("くけ", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE),
+      newToken("こ", 14,15, CJKTokenizer.DOUBLE_TOKEN_TYPE)
+    };
+    checkCJKToken(str, out_tokens);
+  }
+
+  public void testSingleChar() throws IOException {
+    String str = "一";
+       
+    Token[] out_tokens = { 
+      newToken("一", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
+    };
+    checkCJKToken(str, out_tokens);
+  }
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java	(revision 699031)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java	(working copy)
@@ -37,7 +37,18 @@
  */
 public final class CJKTokenizer extends Tokenizer {
     //~ Static fields/initializers ---------------------------------------------
+    /** Word token type */
+    static final int WORD_TYPE = 0;
+  
+    /** Single byte token type */
+    static final int SINGLE_TOKEN_TYPE = 1;
 
+    /** Double byte token type */
+    static final int DOUBLE_TOKEN_TYPE = 2;
+  
+    /** Names for token types */
+    static final String[] TOKEN_TYPE_NAMES = { "word", "single", "double" };
+  
     /** Max word length */
     private static final int MAX_WORD_LEN = 255;
 
@@ -68,7 +79,7 @@
     private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
 
     /** word type: single=>ASCII  double=>non-ASCII word=>default */
-    private String tokenType = "word";
+    private int tokenType = WORD_TYPE;
 
     /**
      * tag: previous character is a cached double-byte character  "C1C2C3C4"
@@ -148,7 +159,7 @@
                     || (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
                ) {
                 if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
-                    /** convert  HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN */
+                    // convert  HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN
                     int i = (int) c;
                     i = i - 65248;
                     c = (char) i;
@@ -163,19 +174,17 @@
                         //      ^--: the current character begin to token the ASCII
                         // letter
                         start = offset - 1;
-                    } else if (tokenType == "double") {
+                    } else if (tokenType == DOUBLE_TOKEN_TYPE) {
                         // "javaC1C2C3C4linux" <br>
                         //              ^--: the previous non-ASCII
                         // : the current character
                         offset--;
                         bufferIndex--;
-                        tokenType = "single";
 
                         if (preIsTokened == true) {
                             // there is only one non-ASCII has been stored
                             length = 0;
                             preIsTokened = false;
-
                             break;
                         } else {
                             break;
@@ -184,7 +193,7 @@
 
                     // store the LowerCase(c) in the buffer
                     buffer[length++] = Character.toLowerCase(c);
-                    tokenType = "single";
+                    tokenType = SINGLE_TOKEN_TYPE;
 
                     // break the procedure if buffer overflowed!
                     if (length == MAX_WORD_LEN) {
@@ -204,9 +213,9 @@
                     if (length == 0) {
                         start = offset - 1;
                         buffer[length++] = c;
-                        tokenType = "double";
+                        tokenType = DOUBLE_TOKEN_TYPE;
                     } else {
-                        if (tokenType == "single") {
+                      if (tokenType == SINGLE_TOKEN_TYPE) {
                             offset--;
                             bufferIndex--;
 
@@ -214,7 +223,7 @@
                             break;
                         } else {
                             buffer[length++] = c;
-                            tokenType = "double";
+                            tokenType = DOUBLE_TOKEN_TYPE;
 
                             if (length == 2) {
                                 offset--;
@@ -236,7 +245,15 @@
                 }
             }
         }
-
-        return reusableToken.reinit(buffer, 0, length, start, start+length, tokenType);
+      
+        if (length > 0) {
+            return reusableToken.reinit
+                (buffer, 0, length, start, start+length, TOKEN_TYPE_NAMES[tokenType]);
+        } else if (dataLen != -1) {
+            // Don't return an empty string - recurse to get the next token
+            return next(reusableToken);
+        } else {
+          return null;
+        }
     }
 }
