Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java	(revision 1369250)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java	(working copy)
@@ -64,4 +64,37 @@
     assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
         new String[] { "多", "く", "の",  "学生", "が",  "試験", "に",  "落", "ち", "た" });
   }
+  
+  public void testAllScripts() throws Exception {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+        return new TokenStreamComponents(t, 
+            new CJKBigramFilter(t, 0xff, false));
+      }
+    };
+    assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
+        new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
+  }
+  
+  public void testUnigramsAndBigramsAllScripts() throws Exception {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+        return new TokenStreamComponents(t, 
+            new CJKBigramFilter(t, 0xff, true));
+      }
+    };
+    assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
+        new String[] { 
+        "多", "多く", "く",  "くの", "の",  "の学", "学", "学生", "生", 
+        "生が", "が",  "が試", "試", "試験", "験", "験に", "に", 
+                "に落", "落", "落ち", "ち", "ちた", "た" 
+        },
+        new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 
+                    0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }
+    );
+  }
 }
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java	(revision 1369250)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java	(working copy)
@@ -33,12 +33,13 @@
  *     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
  *     &lt;filter class="solr.CJKBigramFilterFactory" 
  *       han="true" hiragana="true" 
- *       katakana="true" hangul="true" /&gt;
+ *       katakana="true" hangul="true" outputUnigrams="false" /&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
  */
 public class CJKBigramFilterFactory extends TokenFilterFactory {
   int flags;
+  boolean outputUnigrams;
 
   @Override
   public void init(Map<String,String> args) {
@@ -56,10 +57,11 @@
     if (getBoolean("hangul", true)) {
       flags |= CJKBigramFilter.HANGUL;
     }
+    outputUnigrams = getBoolean("outputUnigrams", false);
   }
   
   @Override
   public TokenStream create(TokenStream input) {
-    return new CJKBigramFilter(input, flags);
+    return new CJKBigramFilter(input, flags, outputUnigrams);
   }
 }
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java	(revision 1369250)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java	(working copy)
@@ -24,6 +24,8 @@
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.ArrayUtil;
 
@@ -67,10 +69,16 @@
   private final Object doHiragana;
   private final Object doKatakana;
   private final Object doHangul;
+  
+  // true if we should output unigram tokens always
+  private final boolean outputUnigrams;
+  private boolean ngramState; // false = output unigram, true = output bigram
     
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+  private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
   
   // buffers containing codepoint and offsets in parallel
   int buffer[] = new int[8];
@@ -95,16 +103,28 @@
   }
   
   /** 
-   * Create a new CJKBigramFilter, specifying which writing systems should be bigrammed.
+   * Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, false)}
+   */
+  public CJKBigramFilter(TokenStream in, int flags) {
+    this(in, flags, false);
+  }
+  
+  /**
+   * Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
+   * and whether or not unigrams should also be output.
    * @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA}, 
    *        {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
+   * @param outputUnigrams true if unigrams for the selected writing systems should also be output.
+   *        when this is false, this is only done when there are no adjacent characters to form
+   *        a bigram.
    */
-  public CJKBigramFilter(TokenStream in, int flags) {
+  public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams) {
     super(in);
     doHan =      (flags & HAN) == 0      ? NO : HAN_TYPE;
     doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
     doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
     doHangul =   (flags & HANGUL) == 0   ? NO : HANGUL_TYPE;
+    this.outputUnigrams = outputUnigrams;
   }
   
   /*
@@ -120,7 +140,17 @@
         // case 1: we have multiple remaining codepoints buffered,
         // so we can emit a bigram here.
         
-        flushBigram();
+        if (outputUnigrams) {
+          if (ngramState) {
+            flushBigram();
+          } else {
+            flushUnigram();
+            index--;
+          }
+          ngramState = !ngramState;
+        } else {
+          flushBigram();
+        }
         return true;
       } else if (doNext()) {
         
@@ -260,6 +290,11 @@
     termAtt.setLength(len2);
     offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
     typeAtt.setType(DOUBLE_TYPE);
+    // when outputting unigrams, all bigrams are synonyms that span two unigrams
+    if (outputUnigrams) {
+      posIncAtt.setPositionIncrement(0);
+      posLengthAtt.setPositionLength(2);
+    }
     index++;
   }
   
@@ -292,7 +327,13 @@
    * inputs.
    */
   private boolean hasBufferedUnigram() {
-    return bufferLen == 1 && index == 0;
+    if (outputUnigrams) {
+      // when outputting unigrams always
+      return bufferLen - index == 1;
+    } else {
+      // otherwise its only when we have a lone CJK character
+      return bufferLen == 1 && index == 0;
+    }
   }
 
   @Override
