Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java	(working copy)
@@ -39,11 +39,6 @@
  */
 public final class SentenceTokenizer extends Tokenizer {
 
-  /**
-   * End of sentence punctuation: 。，！？；,!?;
-   */
-  private final static String PUNCTION = "。，！？；,!?;";
-
   private final StringBuilder buffer = new StringBuilder();
 
   private int tokenStart = 0, tokenEnd = 0;
@@ -86,12 +81,12 @@
     while (true) {
       if (ci == -1) {
         break;
-      } else if (PUNCTION.indexOf(ch) != -1) {
+      } else if (isChinesePunctuation(ch)) {
         // End of a sentence
         buffer.append(ch);
         tokenEnd++;
         break;
-      } else if (atBegin && Utility.SPACES.indexOf(ch) != -1) {
+      } else if (atBegin && isChineseSpace(ch)) {
         tokenStart++;
         tokenEnd++;
         ci = input.read();
@@ -104,8 +99,7 @@
         ci = input.read();
         ch = (char) ci;
         // Two spaces, such as CR, LF
-        if (Utility.SPACES.indexOf(ch) != -1
-            && Utility.SPACES.indexOf(pch) != -1) {
+        if (isChineseSpace(ch) && isChineseSpace(pch)) {
           // buffer.append(ch);
           tokenEnd++;
           break;
@@ -115,7 +109,13 @@
     if (buffer.length() == 0)
       return false;
     else {
-      termAtt.setTermBuffer(buffer.toString());
+      final int length = buffer.length();
+      char termBuffer[] = termAtt.termBuffer();
+      if (termBuffer.length < length)
+        termBuffer = termAtt.resizeTermBuffer(length);
+      
+      buffer.getChars(0, length, termBuffer, 0);
+      termAtt.setTermLength(length);
       offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
       typeAtt.setType("sentence");
       return true;
@@ -131,4 +131,39 @@
     super.reset(input);
     reset();
   }
+  
+  /**
+   * true if <code>ch</code> is any one of 。，！？；,!?
+   */
+  private boolean isChinesePunctuation(int ch) {
+    switch(ch) {
+      case '。':
+      case '，':
+      case '！':
+      case '？':
+      case '；':
+      case ',':
+      case '!':
+      case '?':
+        return true;
+      default:
+        return false;
+    }
+  }
+  
+  /**
+   * true if <code>ch</code> is one of {@link Utility#SPACES}
+   */
+  private boolean isChineseSpace(int ch) {
+    switch(ch) {
+      case ' ':
+      case '　':
+      case '\t':
+      case '\r':
+      case '\n':
+        return true;
+      default:
+        return false;
+    }
+  }
 }
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordSegmenter.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordSegmenter.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordSegmenter.java	(working copy)
@@ -17,7 +17,6 @@
 
 package org.apache.lucene.analysis.cn.smart;
 
-import java.util.Collections;
 import java.util.List;
 
 import org.apache.lucene.analysis.cn.smart.hhmm.HHMMSegmenter;
@@ -49,15 +48,15 @@
 
     List<SegToken> segTokenList = hhmmSegmenter.process(sentence);
     // tokens from sentence, excluding WordType.SENTENCE_BEGIN and WordType.SENTENCE_END
-    List<SegToken> result = Collections.emptyList();
+    if (segTokenList.size() > 2) { // if its not an empty sentence
+      segTokenList.remove(0);
+      segTokenList.remove(segTokenList.size() - 1);
+    }
     
-    if (segTokenList.size() > 2) // if its not an empty sentence
-      result = segTokenList.subList(1, segTokenList.size() - 1);
+    for (int i = 0; i < segTokenList.size(); i++)
+      convertSegToken(segTokenList.get(i), sentence, startOffset);
     
-    for (SegToken st : result)
-      convertSegToken(st, sentence, startOffset);
-    
-    return result;
+    return segTokenList;
   }
 
   /**
@@ -78,8 +77,8 @@
       case WordType.NUMBER:
       case WordType.FULLWIDTH_NUMBER:
       case WordType.FULLWIDTH_STRING:
-        st.charArray = sentence.substring(st.startOffset, st.endOffset)
-            .toCharArray();
+        st.charArray = new char[st.endOffset - st.startOffset];
+        sentence.getChars(st.startOffset, st.endOffset, st.charArray, 0);
         break;
       default:
         break;
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java	(working copy)
@@ -18,7 +18,6 @@
 package org.apache.lucene.analysis.cn.smart;
 
 import java.io.IOException;
-import java.util.Iterator;
 import java.util.List;
 
 import org.apache.lucene.analysis.TokenFilter;
@@ -40,7 +39,7 @@
 
   private WordSegmenter wordSegmenter;
 
-  private Iterator<SegToken> tokenIter;
+  private int tokenIter;
 
   private List<SegToken> tokenBuffer;
   
@@ -62,18 +61,12 @@
   }
   
   public boolean incrementToken() throws IOException {   
-    if (tokenIter == null || !tokenIter.hasNext()) {
+    if (tokenBuffer == null || tokenIter >= tokenBuffer.size()) {
       // there are no remaining tokens from the current sentence... are there more sentences?
       if (input.incrementToken()) {
         // a new sentence is available: process it.
         tokenBuffer = wordSegmenter.segmentSentence(termAtt.term(), offsetAtt.startOffset());
-        tokenIter = tokenBuffer.iterator();
-        /* 
-         * it should not be possible to have a sentence with 0 words, check just in case.
-         * returning EOS isn't the best either, but its the behavior of the original code.
-         */
-        if (!tokenIter.hasNext())
-          return false;
+        tokenIter = 0;
       } else {
         return false; // no more sentences, end of stream!
       }
@@ -81,7 +74,7 @@
     // WordTokenFilter must clear attributes, as it is creating new tokens.
     clearAttributes();
     // There are remaining tokens from the current sentence, return the next one. 
-    SegToken nextWord = tokenIter.next();
+    SegToken nextWord = tokenBuffer.get(tokenIter++);
     termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);
     offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset);
     typeAtt.setType("word");
@@ -90,6 +83,7 @@
 
   public void reset() throws IOException {
     super.reset();
-    tokenIter = null;
+    tokenIter = 0;
+    tokenBuffer = null;
   }
 }
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java	(working copy)
@@ -18,12 +18,12 @@
 package org.apache.lucene.analysis.cn.smart.hhmm;
 
 import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.lucene.analysis.cn.smart.Utility;
+import org.apache.lucene.util.ArrayUtil;
 
 /**
  * Graph representing possible token pairs (bigrams) at each start offset in the sentence.
@@ -38,15 +38,22 @@
  */
 class BiSegGraph {
 
-  private Map<Integer,ArrayList<SegTokenPair>> tokenPairListTable = new HashMap<Integer,ArrayList<SegTokenPair>>();
-
+  // I think this is stupid and there's a more efficient way.
+  // for now its temporary hack.
+  private double[][] table;
+  private int min[]; // minimum viterbi search bounds
+  private int max[]; // maximum viterbi search bounds
+  
+  private int path[];
+  private double pathWeight[];
+  
+  int tableSize;
   private List<SegToken> segTokenList;
 
   private static BigramDictionary bigramDict = BigramDictionary.getInstance();
 
   public BiSegGraph(SegGraph segGraph) {
-    segTokenList = segGraph.makeIndex();
-    generateBiSegGraph(segGraph);
+    reset(segGraph);
   }
 
   /*
@@ -59,19 +66,17 @@
     double oneWordFreq, weight, tinyDouble = 1.0 / Utility.MAX_FREQUENCE;
 
     int next;
-    char[] idBuffer;
-    // get the list of tokens ordered and indexed
-    segTokenList = segGraph.makeIndex();
+
     // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
     int key = -1;
     List<SegToken> nextTokens = null;
     while (key < maxStart) {
-      if (segGraph.isStartExist(key)) {
+      List<SegToken> tokenList = segGraph.getStartList(key);
+      if (!tokenList.isEmpty()) {
 
-        List<SegToken> tokenList = segGraph.getStartList(key);
-
         // Calculate all tokens for a given key.
-        for (SegToken t1 : tokenList) {
+        for (int i = 0; i < tokenList.size(); i++) {
+          SegToken t1 = tokenList.get(i);
           oneWordFreq = t1.weight;
           next = t1.endOffset;
           nextTokens = null;
@@ -80,24 +85,19 @@
           // If we cannot find the next Token, then go to the end and repeat the same cycle.
           while (next <= maxStart) {
             // Because the beginning position of endToken is sentenceLen, so equal to sentenceLen can find endToken.
-            if (segGraph.isStartExist(next)) {
-              nextTokens = segGraph.getStartList(next);
+            nextTokens = segGraph.getStartList(next);
+            if (!nextTokens.isEmpty())
               break;
-            }
             next++;
           }
-          if (nextTokens == null) {
+          if (nextTokens == null || nextTokens.isEmpty()) {
             break;
           }
-          for (SegToken t2 : nextTokens) {
-            idBuffer = new char[t1.charArray.length + t2.charArray.length + 1];
-            System.arraycopy(t1.charArray, 0, idBuffer, 0, t1.charArray.length);
-            idBuffer[t1.charArray.length] = BigramDictionary.WORD_SEGMENT_CHAR;
-            System.arraycopy(t2.charArray, 0, idBuffer,
-                t1.charArray.length + 1, t2.charArray.length);
+          for (int j = 0; j < nextTokens.size(); j++) {
+            SegToken t2 = nextTokens.get(j);
 
             // Two linked Words frequency
-            wordPairFreq = bigramDict.getFrequency(idBuffer);
+            wordPairFreq = bigramDict.getFrequency(t1.charArray, t2.charArray);
 
             // Smoothing
 
@@ -109,9 +109,10 @@
                     + (1.0 - smooth)
                     * ((1.0 - tinyDouble) * wordPairFreq / (1.0 + oneWordFreq) + tinyDouble));
 
-            SegTokenPair tokenPair = new SegTokenPair(idBuffer, t1.index,
-                t2.index, weight);
-            this.addSegTokenPair(tokenPair);
+            // table[to][from] = weight
+            table[t2.index][t1.index] = weight;
+            min[t2.index] = Math.min(min[t2.index], t1.index);
+            max[t2.index] = Math.max(max[t2.index], t1.index);          
           }
         }
       }
@@ -121,115 +122,71 @@
   }
 
   /**
-   * Returns true if their is a list of token pairs at this offset (index of the second token)
-   * 
-   * @param to index of the second token in the token pair
-   * @return true if a token pair exists
-   */
-  public boolean isToExist(int to) {
-    return tokenPairListTable.get(Integer.valueOf(to)) != null;
-  }
-
-  /**
-   * Return a {@link List} of all token pairs at this offset (index of the second token)
-   * 
-   * @param to index of the second token in the token pair
-   * @return {@link List} of token pairs.
-   */
-  public List<SegTokenPair> getToList(int to) {
-    return tokenPairListTable.get(to);
-  }
-
-  /**
-   * Add a {@link SegTokenPair}
-   * 
-   * @param tokenPair {@link SegTokenPair}
-   */
-  public void addSegTokenPair(SegTokenPair tokenPair) {
-    int to = tokenPair.to;
-    if (!isToExist(to)) {
-      ArrayList<SegTokenPair> newlist = new ArrayList<SegTokenPair>();
-      newlist.add(tokenPair);
-      tokenPairListTable.put(to, newlist);
-    } else {
-      List<SegTokenPair> tokenPairList = tokenPairListTable.get(to);
-      tokenPairList.add(tokenPair);
-    }
-  }
-
-  /**
-   * Get the number of {@link SegTokenPair} entries in the table.
-   * @return number of {@link SegTokenPair} entries
-   */
-  public int getToCount() {
-    return tokenPairListTable.size();
-  }
-
-  /**
    * Find the shortest path with the Viterbi algorithm.
    * @return {@link List}
    */
   public List<SegToken> getShortPath() {
     int current;
-    int nodeCount = getToCount();
-    List<PathNode> path = new ArrayList<PathNode>();
-    PathNode zeroPath = new PathNode();
-    zeroPath.weight = 0;
-    zeroPath.preNode = 0;
-    path.add(zeroPath);
+    int nodeCount = tableSize - 1;
+
     for (current = 1; current <= nodeCount; current++) {
       double weight;
-      List<SegTokenPair> edges = getToList(current);
+      double edges[] = table[current];
 
       double minWeight = Double.MAX_VALUE;
-      SegTokenPair minEdge = null;
-      for (SegTokenPair edge : edges) {
-        weight = edge.weight;
-        PathNode preNode = path.get(edge.from);
-        if (preNode.weight + weight < minWeight) {
-          minWeight = preNode.weight + weight;
-          minEdge = edge;
+      int minEdge = 0;
+      for (int i = min[current]; i <= max[current]; i++) {
+        weight = edges[i];
+        if (weight != 0) {
+          double preNodeWeight = pathWeight[i];
+          if (preNodeWeight + weight < minWeight) {
+            minWeight = preNodeWeight + weight;
+            minEdge = i;
+          }
         }
       }
-      PathNode newNode = new PathNode();
-      newNode.weight = minWeight;
-      newNode.preNode = minEdge.from;
-      path.add(newNode);
+      path[current] = minEdge;
+      pathWeight[current] = minWeight;
     }
 
-    // Calculate PathNodes
-    int preNode, lastNode;
-    lastNode = path.size() - 1;
-    current = lastNode;
-    List<Integer> rpath = new ArrayList<Integer>();
     List<SegToken> resultPath = new ArrayList<SegToken>();
-
-    rpath.add(current);
+    current = current - 1;
+    
+    resultPath.add(segTokenList.get(current));
     while (current != 0) {
-      PathNode currentPathNode = (PathNode) path.get(current);
-      preNode = currentPathNode.preNode;
-      rpath.add(Integer.valueOf(preNode));
+      int preNode = path[current];
+      resultPath.add(segTokenList.get(preNode));
       current = preNode;
     }
-    for (int j = rpath.size() - 1; j >= 0; j--) {
-      Integer idInteger = (Integer) rpath.get(j);
-      int id = idInteger.intValue();
-      SegToken t = segTokenList.get(id);
-      resultPath.add(t);
-    }
+    
+    Collections.reverse(resultPath);
     return resultPath;
-
   }
-
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    Collection<ArrayList<SegTokenPair>>  values = tokenPairListTable.values();
-    for (ArrayList<SegTokenPair> segList : values) {
-      for (SegTokenPair pair : segList) {
-        sb.append(pair + "\n");
-      }
+  
+  void reset(SegGraph segGraph) {
+    // get the list of tokens ordered and indexed
+    segTokenList = segGraph.makeIndex();
+    SegToken lastToken = segTokenList.get(segTokenList.size() - 1);
+    tableSize = lastToken.index + 1;
+    if (table == null || tableSize > table.length) {
+      int newSize = ArrayUtil.getNextSize(tableSize);
+      table = new double[newSize][];
+      for (int i = 0; i < newSize; i++)
+        table[i] = new double[newSize];
+      min = new int[newSize];
+      Arrays.fill(min, 0, tableSize, Integer.MAX_VALUE);
+      max = new int[newSize];
+      path = new int[newSize];
+      pathWeight = new double[newSize];
+    } else {
+      for (int i = 0; i < tableSize; i++)
+        Arrays.fill(table[i], 0, tableSize, 0);
+      Arrays.fill(min, 0, tableSize, Integer.MAX_VALUE);
+      Arrays.fill(max, 0, tableSize, 0);
+      Arrays.fill(path, 0, tableSize, 0);
+      Arrays.fill(pathWeight, 0, tableSize, 0);
     }
-    return sb.toString();
+    generateBiSegGraph(segGraph);
   }
 
 }
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java	(working copy)
@@ -60,8 +60,6 @@
 
   private int max = 0;
 
-  private int repeat = 0;
-
   // static Logger log = Logger.getLogger(BigramDictionary.class);
 
   public synchronized static BigramDictionary getInstance() {
@@ -236,26 +234,79 @@
     } else
       return -1;
   }
+ 
+  /**
+   * 32-bit FNV Hash Function (on bigrams)
+   * 
+   * @param left left side of bigram
+   * @param right right side of bigram
+   * @return hashcode
+   */
+  private long hash1(char left[], char right[]) {
+    final long p = 1099511628211L;
+    long hash = 0xcbf29ce484222325L;
+    for (int i = 0; i < left.length; i++) {
+      char d = left[i];
+      hash = (hash ^ (d & 0x00FF)) * p;
+      hash = (hash ^ (d >> 8)) * p;
+    }
+    
+    hash = (hash ^ (BigramDictionary.WORD_SEGMENT_CHAR & 0x00FF)) * p;
+    hash = (hash ^ (BigramDictionary.WORD_SEGMENT_CHAR >> 8)) * p;
+    
+    for (int i = 0; i < right.length; i++) {
+      char d = right[i];
+      hash = (hash ^ (d & 0x00FF)) * p;
+      hash = (hash ^ (d >> 8)) * p;
+    }
+    return hash;
+  }
+  
+  /**
+   * djb2 hash algorithm (k=33) (on bigrams)
+   * 
+   * @param left left side of bigram
+   * @param right right side of bigram
+   * @return hashcode
+   */
+  private int hash2(char left[], char right[]) {
+    int hash = 5381;
 
+    /* hash 33 + c */
+    for (int i = 0; i < left.length; i++) {
+      char d = left[i];
+      hash = ((hash << 5) + hash) + d & 0x00FF;
+      hash = ((hash << 5) + hash) + d >> 8;
+    }
+    
+    hash = ((hash << 5) + hash) + BigramDictionary.WORD_SEGMENT_CHAR & 0x00FF;
+    hash = ((hash << 5) + hash) + BigramDictionary.WORD_SEGMENT_CHAR >> 8;
+    
+    for (int i = 0; i < right.length; i++) {
+      char d = right[i];
+      hash = ((hash << 5) + hash) + d & 0x00FF;
+      hash = ((hash << 5) + hash) + d >> 8;
+    }
+    return hash;
+  }
+  
   /*
-   * lookup the index into the frequency array.
+   * lookup the index into the frequency array for a bigram
    */
-  private int getBigramItemIndex(char carray[]) {
-    long hashId = hash1(carray);
+  private int getBigramItemIndex(char left[], char right[]) {
+    long hashId = hash1(left, right);
     int hash1 = (int) (hashId % PRIME_BIGRAM_LENGTH);
-    int hash2 = hash2(carray) % PRIME_BIGRAM_LENGTH;
+    int hash2 = hash2(left, right) % PRIME_BIGRAM_LENGTH;
     if (hash1 < 0)
       hash1 = PRIME_BIGRAM_LENGTH + hash1;
     if (hash2 < 0)
       hash2 = PRIME_BIGRAM_LENGTH + hash2;
     int index = hash1;
     int i = 1;
-    repeat++;
     while (bigramHashTable[index] != 0 && bigramHashTable[index] != hashId
         && i < PRIME_BIGRAM_LENGTH) {
       index = (hash1 + i * hash2) % PRIME_BIGRAM_LENGTH;
       i++;
-      repeat++;
       if (i > max)
         max = i;
     }
@@ -266,12 +317,14 @@
     } else
       return -1;
   }
-
-  public int getFrequency(char[] carray) {
-    int index = getBigramItemIndex(carray);
+  
+  /**
+   * get the frequency of a bigram.
+   */
+  final int getFrequency(char left[], char right[]) {
+    int index = getBigramItemIndex(left, right);
     if (index != -1)
       return frequencyTable[index];
     return 0;
   }
-
 }
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java	(working copy)
@@ -35,6 +35,21 @@
 public class HHMMSegmenter {
 
   private static WordDictionary wordDict = WordDictionary.getInstance();
+  
+  // static frequencies for constant marker tokens
+  private static final int STRING_FREQUENCY = 
+    wordDict.getFrequency(Utility.STRING_CHAR_ARRAY);
+  private static final int NUMBER_FREQUENCY =
+    wordDict.getFrequency(Utility.NUMBER_CHAR_ARRAY);
+  private static final int SENTENCE_START_FREQUENCY = 
+    wordDict.getFrequency(Utility.START_CHAR_ARRAY);
+  private static final int SENTENCE_END_FREQUENCY =
+    wordDict.getFrequency(Utility.END_CHAR_ARRAY);
+  
+  // reusable word graph
+  private SegGraph segGraph = new SegGraph(10);
+  // reusable bigram graph
+  private BiSegGraph biSegGraph;
 
   /**
    * Create the {@link SegGraph} for a sentence.
@@ -54,7 +69,7 @@
     int wordType;
     char[] charArray;
 
-    SegGraph segGraph = new SegGraph();
+    segGraph.reset(length);
     while (i < length) {
       hasFullWidth = false;
       switch (charTypeArray[i]) {
@@ -63,7 +78,7 @@
           break;
         case CharType.HANZI:
           j = i + 1;
-          wordBuf.delete(0, wordBuf.length());
+          wordBuf.setLength(0);
           // It doesn't matter if a single Chinese character (Hanzi) can form a phrase or not, 
           // it will store that single Chinese character (Hanzi) in the SegGraph.  Otherwise, it will 
           // cause word division.
@@ -115,7 +130,7 @@
           }
           // Found a Token from i to j. Type is LETTER char string.
           charArray = Utility.STRING_CHAR_ARRAY;
-          frequency = wordDict.getFrequency(charArray);
+          frequency = STRING_FREQUENCY;
           wordType = hasFullWidth ? WordType.FULLWIDTH_STRING : WordType.STRING;
           token = new SegToken(charArray, i, j, wordType, frequency);
           segGraph.addToken(token);
@@ -133,7 +148,7 @@
           }
           // Found a Token from i to j. Type is NUMBER char string.
           charArray = Utility.NUMBER_CHAR_ARRAY;
-          frequency = wordDict.getFrequency(charArray);
+          frequency = NUMBER_FREQUENCY;
           wordType = hasFullWidth ? WordType.FULLWIDTH_NUMBER : WordType.NUMBER;
           token = new SegToken(charArray, i, j, wordType, frequency);
           segGraph.addToken(token);
@@ -153,7 +168,7 @@
           // Treat the unrecognized char symbol as unknown string.
           // For example, any symbol not in GB2312 is treated as one of these.
           charArray = Utility.STRING_CHAR_ARRAY;
-          frequency = wordDict.getFrequency(charArray);
+          frequency = STRING_FREQUENCY;
           token = new SegToken(charArray, i, j, WordType.STRING, frequency);
           segGraph.addToken(token);
           i = j;
@@ -163,13 +178,13 @@
 
     // Add two more Tokens: "beginning xx beginning"
     charArray = Utility.START_CHAR_ARRAY;
-    frequency = wordDict.getFrequency(charArray);
+    frequency = SENTENCE_START_FREQUENCY;
     token = new SegToken(charArray, -1, 0, WordType.SENTENCE_BEGIN, frequency);
     segGraph.addToken(token);
 
     // "end xx end"
     charArray = Utility.END_CHAR_ARRAY;
-    frequency = wordDict.getFrequency(charArray);
+    frequency = SENTENCE_END_FREQUENCY;
     token = new SegToken(charArray, length, length + 1, WordType.SENTENCE_END,
         frequency);
     segGraph.addToken(token);
@@ -202,7 +217,10 @@
    */
   public List<SegToken> process(String sentence) {
     SegGraph segGraph = createSegGraph(sentence);
-    BiSegGraph biSegGraph = new BiSegGraph(segGraph);
+    if (biSegGraph == null)
+      biSegGraph = new BiSegGraph(segGraph);
+    else
+      biSegGraph.reset(segGraph);
     List<SegToken> shortPath = biSegGraph.getShortPath();
     return shortPath;
   }
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/PathNode.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/PathNode.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/PathNode.java	(working copy)
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis.cn.smart.hhmm;
-
-/**
- * SmartChineseAnalyzer internal node representation
- * <p>
- * Used by {@link BiSegGraph} to maximize the segmentation with the Viterbi algorithm.
- * </p>
- * <p><font color="#FF0000">
- * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental. 
- * The APIs and file formats introduced here might change in the future and will not be 
- * supported anymore in such a case.</font>
- * </p>
- */
-class PathNode implements Comparable<PathNode> {
-  public double weight;
-
-  public int preNode;
-
-  public int compareTo(PathNode pn) {
-    if (weight < pn.weight)
-      return -1;
-    else if (weight == pn.weight)
-      return 0;
-    else
-      return 1;
-  }
-
-  /**
-   * @see java.lang.Object#hashCode()
-   */
-  public int hashCode() {
-    final int prime = 31;
-    int result = 1;
-    result = prime * result + preNode;
-    long temp;
-    temp = Double.doubleToLongBits(weight);
-    result = prime * result + (int) (temp ^ (temp >>> 32));
-    return result;
-  }
-
-  /**
-   * @see java.lang.Object#equals(java.lang.Object)
-   */
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    PathNode other = (PathNode) obj;
-    if (preNode != other.preNode)
-      return false;
-    if (Double.doubleToLongBits(weight) != Double
-        .doubleToLongBits(other.weight))
-      return false;
-    return true;
-  }
-}
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java	(working copy)
@@ -18,9 +18,7 @@
 package org.apache.lucene.analysis.cn.smart.hhmm;
 
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Graph representing possible tokens at each start offset in the sentence.
@@ -37,19 +35,17 @@
 
   /**
    * Map of start offsets to ArrayList of tokens at that position
+   * Note: offset i is stored as array index i + 1.
+   * This is because the sentence start token is stored at position -1
    */
-  private Map<Integer,ArrayList<SegToken>> tokenListTable = new HashMap<Integer,ArrayList<SegToken>>();
+  private final TokenGraph<SegToken> table;
 
-  private int maxStart = -1;
-
   /**
-   * Returns true if a mapping for the specified start offset exists
-   * 
-   * @param s startOffset
-   * @return true if there are tokens for the startOffset
+   * Create a new SegGraph capable of representing a sentence of maxSize.
    */
-  public boolean isStartExist(int s) {
-    return tokenListTable.get(s) != null;
+  SegGraph(int maxSize) {
+    // sentence size, plus sentence start marker, plus sentence end marker
+    table = new TokenGraph<SegToken>(maxSize + 2);
   }
 
   /**
@@ -59,7 +55,7 @@
    * @return List of tokens at the specified start offset.
    */
   public List<SegToken> getStartList(int s) {
-    return tokenListTable.get(s);
+    return table.get(s + 1);
   }
 
   /**
@@ -68,7 +64,7 @@
    * @return maximum start offset, or -1 if the map is empty.
    */
   public int getMaxStart() {
-    return maxStart;
+    return table.getMax() - 1;
   }
 
   /**
@@ -77,13 +73,14 @@
    */
   public List<SegToken> makeIndex() {
     List<SegToken> result = new ArrayList<SegToken>();
-    int s = -1, count = 0, size = tokenListTable.size();
+    int s = -1, count = 0, size = table.size();
     List<SegToken> tokenList;
     short index = 0;
     while (count < size) {
-      if (isStartExist(s)) {
-        tokenList = tokenListTable.get(s);
-        for (SegToken st : tokenList) {
+      tokenList = getStartList(s);
+      if (!tokenList.isEmpty()) {
+        for (int i = 0; i < tokenList.size(); i++) {
+          SegToken st = tokenList.get(i);
           st.index = index;
           result.add(st);
           index++;
@@ -92,6 +89,7 @@
       }
       s++;
     }
+    
     return result;
   }
 
@@ -100,48 +98,19 @@
    * @param token {@link SegToken}
    */
   public void addToken(SegToken token) {
-    int s = token.startOffset;
-    if (!isStartExist(s)) {
-      ArrayList<SegToken> newlist = new ArrayList<SegToken>();
-      newlist.add(token);
-      tokenListTable.put(s, newlist);
-    } else {
-      List<SegToken> tokenList = tokenListTable.get(s);
-      tokenList.add(token);
-    }
-    if (s > maxStart)
-      maxStart = s;
+    table.add(token.startOffset + 1, token);
   }
 
-  /**
-   * Return a {@link List} of all tokens in the map, ordered by startOffset.
-   * 
-   * @return {@link List} of all tokens in the map.
-   */
-  public List<SegToken> toTokenList() {
-    List<SegToken> result = new ArrayList<SegToken>();
-    int s = -1, count = 0, size = tokenListTable.size();
-    List<SegToken> tokenList;
-
-    while (count < size) {
-      if (isStartExist(s)) {
-        tokenList = tokenListTable.get(s);
-        for (SegToken st : tokenList) {
-          result.add(st);
-        }
-        count++;
-      }
-      s++;
-    }
-    return result;
-  }
-
   public String toString() {
-    List<SegToken> tokenList = this.toTokenList();
+    List<SegToken> tokenList = makeIndex();
     StringBuilder sb = new StringBuilder();
     for (SegToken t : tokenList) {
       sb.append(t + "\n");
     }
     return sb.toString();
   }
+  
+  void reset(int size) {
+    table.reset(size + 2);
+  }
 }
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegToken.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegToken.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegToken.java	(working copy)
@@ -36,12 +36,12 @@
   public char[] charArray;
 
   /**
-   * start offset into {@link #charArray}
+   * start offset into original sentence
    */
   public int startOffset;
 
   /**
-   * end offset into {@link #charArray}
+   * end offset into original sentence
    */
   public int endOffset;
 
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegTokenPair.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegTokenPair.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegTokenPair.java	(working copy)
@@ -1,93 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis.cn.smart.hhmm;
-
-import java.util.Arrays;
-
-/**
- * A pair of tokens in {@link SegGraph}
- * <p><font color="#FF0000">
- * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental. 
- * The APIs and file formats introduced here might change in the future and will not be 
- * supported anymore in such a case.</font>
- * </p>
- */
-class SegTokenPair {
-
-  public char[] charArray;
-
-  /**
-   * index of the first token in {@link SegGraph}
-   */
-  public int from;
-
-  /**
-   * index of the second token in {@link SegGraph}
-   */
-  public int to;
-
-  public double weight;
-
-  public SegTokenPair(char[] idArray, int from, int to, double weight) {
-    this.charArray = idArray;
-    this.from = from;
-    this.to = to;
-    this.weight = weight;
-  }
-
-  /**
-   * @see java.lang.Object#hashCode()
-   */
-  public int hashCode() {
-    final int prime = 31;
-    int result = 1;
-    for(int i=0;i<charArray.length;i++) {
-      result = prime * result + charArray[i];
-    }
-    result = prime * result + from;
-    result = prime * result + to;
-    long temp;
-    temp = Double.doubleToLongBits(weight);
-    result = prime * result + (int) (temp ^ (temp >>> 32));
-    return result;
-  }
-
-  /**
-   * @see java.lang.Object#equals(java.lang.Object)
-   */
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    SegTokenPair other = (SegTokenPair) obj;
-    if (!Arrays.equals(charArray, other.charArray))
-      return false;
-    if (from != other.from)
-      return false;
-    if (to != other.to)
-      return false;
-    if (Double.doubleToLongBits(weight) != Double
-        .doubleToLongBits(other.weight))
-      return false;
-    return true;
-  }
-
-}
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/TokenGraph.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/TokenGraph.java	(revision 0)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/TokenGraph.java	(revision 0)
@@ -0,0 +1,95 @@
+package org.apache.lucene.analysis.cn.smart.hhmm;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A generic graph of items indexed by position.
+ */
+final class TokenGraph<E> {
+  private List<ArrayList<E>> table = new ArrayList<ArrayList<E>>();
+  private int size = 0;
+  private int max = 0;
+  
+  /**
+   * Create a new TokenGraph capable of holding <code>size</code> mappings.
+   */
+  TokenGraph(int size) {
+    reset(size);
+  }
+  
+  /**
+   * Add an <code>item</code> at the specified <code>position</code>
+   */
+  void add(int position, E item) {
+    List<E> list = table.get(position);
+    if (list.isEmpty())
+      size++;
+    list.add(item);
+    max = Math.max(max, position);
+  }
+  
+  /**
+   * Get the list of items at the specified <code>position</code>
+   * @return {@link List} of items at this position. 
+   * The list will be empty if no mappings exist.
+   */
+  List<E> get(int position) {
+    return table.get(position);
+  }
+  
+  /**
+   * True if items exist at the specified <code>position</code>
+   */
+  boolean isExists(int position) {
+    return !table.get(position).isEmpty();
+  }
+  
+  /**
+   * Get the number of mappings in this graph.
+   */
+  int size() {
+    return size;
+  }
+  
+  /**
+   * Get the maximum position that has a mapping in this graph.
+   */
+  int getMax() {
+    return max;
+  }
+  
+  /**
+   * Clear the graph, resetting it to a new empty graph of <code>size</code>
+   */
+  void reset(int size) {
+    if (table.size() >= size) {
+      for (int i = 0; i < size; i++)
+        table.get(i).clear();
+    } else {
+      for (int i = 0; i < table.size(); i++)
+        table.get(i).clear();
+      for (int i = table.size(); i < size; i++)
+        table.add(new ArrayList<E>());
+    }
+    this.size = 0;
+    this.max = 0;
+  }
+}

Property changes on: contrib\analyzers\smartcn\src\java\org\apache\lucene\analysis\cn\smart\hhmm\TokenGraph.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
===================================================================
--- contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java	(revision 831792)
+++ contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java	(working copy)
@@ -94,6 +94,14 @@
     assertAnalyzesTo(ca, sentence, result, startOffsets, endOffsets, posIncr);
   }
   
+  /*
+   * Test analyzing an empty document
+   */
+  public void testEmpty() throws Exception {
+    Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT);
+    assertAnalyzesTo(ca, "", new String[] {});
+  }
+  
   public void testChineseAnalyzer() throws Exception {
     Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true);
     String sentence = "我购买了道具和服装。";
