Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java	(revision 831430)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java	(working copy)
@@ -39,11 +39,6 @@
  */
 public final class SentenceTokenizer extends Tokenizer {
 
-  /**
-   * End of sentence punctuation: 。，！？；,!?;
-   */
-  private final static String PUNCTION = "。，！？；,!?;";
-
   private final StringBuilder buffer = new StringBuilder();
 
   private int tokenStart = 0, tokenEnd = 0;
@@ -86,12 +81,12 @@
     while (true) {
       if (ci == -1) {
         break;
-      } else if (PUNCTION.indexOf(ch) != -1) {
+      } else if (isChinesePunctuation(ch)) {
         // End of a sentence
         buffer.append(ch);
         tokenEnd++;
         break;
-      } else if (atBegin && Utility.SPACES.indexOf(ch) != -1) {
+      } else if (atBegin && isChineseSpace(ch)) {
         tokenStart++;
         tokenEnd++;
         ci = input.read();
@@ -104,8 +99,7 @@
         ci = input.read();
         ch = (char) ci;
         // Two spaces, such as CR, LF
-        if (Utility.SPACES.indexOf(ch) != -1
-            && Utility.SPACES.indexOf(pch) != -1) {
+        if (isChineseSpace(ch) && isChineseSpace(pch)) {
           // buffer.append(ch);
           tokenEnd++;
           break;
@@ -115,7 +109,13 @@
     if (buffer.length() == 0)
       return false;
     else {
-      termAtt.setTermBuffer(buffer.toString());
+      final int length = buffer.length();
+      char termBuffer[] = termAtt.termBuffer();
+      if (termBuffer.length < length)
+        termBuffer = termAtt.resizeTermBuffer(length);
+      
+      buffer.getChars(0, length, termBuffer, 0);
+      termAtt.setTermLength(length);
       offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
       typeAtt.setType("sentence");
       return true;
@@ -131,4 +131,33 @@
     super.reset(input);
     reset();
   }
+  
+  private boolean isChinesePunctuation(int ch) {
+    switch(ch) {
+      case '。':
+      case '，':
+      case '！':
+      case '？':
+      case '；':
+      case ',':
+      case '!':
+      case '?':
+        return true;
+      default:
+        return false;
+    }
+  }
+  
+  private boolean isChineseSpace(int ch) {
+    switch(ch) {
+      case ' ':
+      case '　':
+      case '\t':
+      case '\r':
+      case '\n':
+        return true;
+      default:
+        return false;
+    }
+  }
 }
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java	(revision 831638)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java	(working copy)
@@ -18,10 +18,7 @@
 package org.apache.lucene.analysis.cn.smart.hhmm;
 
 import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.lucene.analysis.cn.smart.Utility;
 
@@ -38,14 +35,19 @@
  */
 class BiSegGraph {
 
-  private Map<Integer,ArrayList<SegTokenPair>> tokenPairListTable = new HashMap<Integer,ArrayList<SegTokenPair>>();
+  private final ArrayList<SegTokenPair> tokenPairListTable[];
+  private int tableSize = 0; /* number of mappings in tokenPairListTable */
+  
+  private final List<SegToken> segTokenList;
 
-  private List<SegToken> segTokenList;
-
   private static BigramDictionary bigramDict = BigramDictionary.getInstance();
 
+  @SuppressWarnings("unchecked")
   public BiSegGraph(SegGraph segGraph) {
+    // get the list of tokens ordered and indexed
     segTokenList = segGraph.makeIndex();
+    SegToken lastToken = segTokenList.get(segTokenList.size() - 1);
+    tokenPairListTable = (ArrayList<SegTokenPair>[]) new ArrayList[lastToken.index + 1];
     generateBiSegGraph(segGraph);
   }
 
@@ -60,8 +62,7 @@
 
     int next;
     char[] idBuffer;
-    // get the list of tokens ordered and indexed
-    segTokenList = segGraph.makeIndex();
+
     // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
     int key = -1;
     List<SegToken> nextTokens = null;
@@ -71,7 +72,8 @@
         List<SegToken> tokenList = segGraph.getStartList(key);
 
         // Calculate all tokens for a given key.
-        for (SegToken t1 : tokenList) {
+        for (int i = 0; i < tokenList.size(); i++) {
+          SegToken t1 = tokenList.get(i);
           oneWordFreq = t1.weight;
           next = t1.endOffset;
           nextTokens = null;
@@ -89,7 +91,8 @@
           if (nextTokens == null) {
             break;
           }
-          for (SegToken t2 : nextTokens) {
+          for (int j = 0; j < nextTokens.size(); j++) {
+            SegToken t2 = nextTokens.get(j);
             idBuffer = new char[t1.charArray.length + t2.charArray.length + 1];
             System.arraycopy(t1.charArray, 0, idBuffer, 0, t1.charArray.length);
             idBuffer[t1.charArray.length] = BigramDictionary.WORD_SEGMENT_CHAR;
@@ -127,7 +130,8 @@
    * @return true if a token pair exists
    */
   public boolean isToExist(int to) {
-    return tokenPairListTable.get(Integer.valueOf(to)) != null;
+    assert(to < tokenPairListTable.length);
+    return tokenPairListTable[to] != null;
   }
 
   /**
@@ -137,7 +141,8 @@
    * @return {@link List} of token pairs.
    */
   public List<SegTokenPair> getToList(int to) {
-    return tokenPairListTable.get(to);
+    assert(to < tokenPairListTable.length);
+    return tokenPairListTable[to];
   }
 
   /**
@@ -146,23 +151,24 @@
    * @param tokenPair {@link SegTokenPair}
    */
   public void addSegTokenPair(SegTokenPair tokenPair) {
-    int to = tokenPair.to;
+    final int to = tokenPair.to;
     if (!isToExist(to)) {
       ArrayList<SegTokenPair> newlist = new ArrayList<SegTokenPair>();
       newlist.add(tokenPair);
-      tokenPairListTable.put(to, newlist);
+      tokenPairListTable[to] = newlist;
+      tableSize++;
     } else {
-      List<SegTokenPair> tokenPairList = tokenPairListTable.get(to);
+      List<SegTokenPair> tokenPairList = tokenPairListTable[to];
       tokenPairList.add(tokenPair);
     }
   }
 
   /**
-   * Get the number of {@link SegTokenPair} entries in the table.
-   * @return number of {@link SegTokenPair} entries
+   * Get the number of mappings in the table.
+   * @return table size
    */
   public int getToCount() {
-    return tokenPairListTable.size();
+    return tableSize;
   }
 
   /**
@@ -183,7 +189,8 @@
 
       double minWeight = Double.MAX_VALUE;
       SegTokenPair minEdge = null;
-      for (SegTokenPair edge : edges) {
+      for (int i = 0; i < edges.size(); i++) {
+        SegTokenPair edge = edges.get(i);
         weight = edge.weight;
         PathNode preNode = path.get(edge.from);
         if (preNode.weight + weight < minWeight) {
@@ -206,14 +213,13 @@
 
     rpath.add(current);
     while (current != 0) {
-      PathNode currentPathNode = (PathNode) path.get(current);
+      PathNode currentPathNode = path.get(current);
       preNode = currentPathNode.preNode;
-      rpath.add(Integer.valueOf(preNode));
+      rpath.add(preNode);
       current = preNode;
     }
     for (int j = rpath.size() - 1; j >= 0; j--) {
-      Integer idInteger = (Integer) rpath.get(j);
-      int id = idInteger.intValue();
+      int id = rpath.get(j);
       SegToken t = segTokenList.get(id);
       resultPath.add(t);
     }
@@ -223,12 +229,11 @@
 
   public String toString() {
     StringBuilder sb = new StringBuilder();
-    Collection<ArrayList<SegTokenPair>>  values = tokenPairListTable.values();
-    for (ArrayList<SegTokenPair> segList : values) {
-      for (SegTokenPair pair : segList) {
-        sb.append(pair + "\n");
-      }
-    }
+    for (int i = 0; i < tokenPairListTable.length; i++)
+      if (tokenPairListTable[i] != null)
+        for (SegTokenPair pair : tokenPairListTable[i])
+          sb.append(pair + "\n");
+        
     return sb.toString();
   }
 
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java	(revision 831638)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java	(working copy)
@@ -60,8 +60,6 @@
 
   private int max = 0;
 
-  private int repeat = 0;
-
   // static Logger log = Logger.getLogger(BigramDictionary.class);
 
   public synchronized static BigramDictionary getInstance() {
@@ -250,12 +248,10 @@
       hash2 = PRIME_BIGRAM_LENGTH + hash2;
     int index = hash1;
     int i = 1;
-    repeat++;
     while (bigramHashTable[index] != 0 && bigramHashTable[index] != hashId
         && i < PRIME_BIGRAM_LENGTH) {
       index = (hash1 + i * hash2) % PRIME_BIGRAM_LENGTH;
       i++;
-      repeat++;
       if (i > max)
         max = i;
     }
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java	(revision 831638)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java	(working copy)
@@ -54,7 +54,7 @@
     int wordType;
     char[] charArray;
 
-    SegGraph segGraph = new SegGraph();
+    SegGraph segGraph = new SegGraph(length);
     while (i < length) {
       hasFullWidth = false;
       switch (charTypeArray[i]) {
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java	(revision 831638)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java	(working copy)
@@ -18,9 +18,7 @@
 package org.apache.lucene.analysis.cn.smart.hhmm;
 
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Graph representing possible tokens at each start offset in the sentence.
@@ -37,19 +35,30 @@
 
   /**
    * Map of start offsets to ArrayList of tokens at that position
+   * Note: offset i is stored as array index i + 1.
+   * This is because the sentence start token is stored at position -1
    */
-  private Map<Integer,ArrayList<SegToken>> tokenListTable = new HashMap<Integer,ArrayList<SegToken>>();
-
+  private final ArrayList<SegToken> tokenListTable[];
+  private int tableSize = 0; /* number of mappings in tokenListTable */
   private int maxStart = -1;
 
   /**
+   * Create a new SegGraph capable of representing a sentence of maxSize.
+   */
+  @SuppressWarnings("unchecked")
+  SegGraph(int maxSize) {
+    // sentence size, plus sentence start marker, plus sentence end marker
+    tokenListTable = (ArrayList<SegToken>[]) new ArrayList[maxSize + 2];
+  }
+  /**
    * Returns true if a mapping for the specified start offset exists
    * 
    * @param s startOffset
    * @return true if there are tokens for the startOffset
    */
   public boolean isStartExist(int s) {
-    return tokenListTable.get(s) != null;
+    assert(s + 1 < tokenListTable.length);
+    return tokenListTable[s + 1] != null;
   }
 
   /**
@@ -59,7 +68,8 @@
    * @return List of tokens at the specified start offset.
    */
   public List<SegToken> getStartList(int s) {
-    return tokenListTable.get(s);
+    assert(s + 1 < tokenListTable.length);
+    return tokenListTable[s + 1];
   }
 
   /**
@@ -77,13 +87,14 @@
    */
   public List<SegToken> makeIndex() {
     List<SegToken> result = new ArrayList<SegToken>();
-    int s = -1, count = 0, size = tokenListTable.size();
+    int s = -1, count = 0, size = tableSize;
     List<SegToken> tokenList;
     short index = 0;
     while (count < size) {
       if (isStartExist(s)) {
-        tokenList = tokenListTable.get(s);
-        for (SegToken st : tokenList) {
+        tokenList = getStartList(s);
+        for (int i = 0; i < tokenList.size(); i++) {
+          SegToken st = tokenList.get(i);
           st.index = index;
           result.add(st);
           index++;
@@ -104,9 +115,10 @@
     if (!isStartExist(s)) {
       ArrayList<SegToken> newlist = new ArrayList<SegToken>();
       newlist.add(token);
-      tokenListTable.put(s, newlist);
+      tokenListTable[s + 1] = newlist;
+      tableSize++;
     } else {
-      List<SegToken> tokenList = tokenListTable.get(s);
+      List<SegToken> tokenList = getStartList(s);
       tokenList.add(token);
     }
     if (s > maxStart)
@@ -120,14 +132,14 @@
    */
   public List<SegToken> toTokenList() {
     List<SegToken> result = new ArrayList<SegToken>();
-    int s = -1, count = 0, size = tokenListTable.size();
+    int s = -1, count = 0, size = tableSize;
     List<SegToken> tokenList;
 
     while (count < size) {
       if (isStartExist(s)) {
-        tokenList = tokenListTable.get(s);
-        for (SegToken st : tokenList) {
-          result.add(st);
+        tokenList = getStartList(s);
+        for (int i = 0; i < tokenList.size(); i++) {
+          result.add(tokenList.get(i));
         }
         count++;
       }
Index: contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
===================================================================
--- contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java	(revision 831430)
+++ contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java	(working copy)
@@ -94,6 +94,14 @@
     assertAnalyzesTo(ca, sentence, result, startOffsets, endOffsets, posIncr);
   }
   
+  /*
+   * Test analyzing an empty document
+   */
+  public void testEmpty() throws Exception {
+    Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT);
+    assertAnalyzesTo(ca, "", new String[] {});
+  }
+  
   public void testChineseAnalyzer() throws Exception {
     Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true);
     String sentence = "我购买了道具和服装。";
