Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/AbstractDictionary.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/AbstractDictionary.java	(revision 816218)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/AbstractDictionary.java	(working copy)
@@ -117,8 +117,9 @@
         // Should be a two-byte character
         return -1;
       }
-      int b0 = (int) (buffer[0] & 0x0FF) - 161; // 编码从A1开始，因此减去0xA1=161
-      int b1 = (int) (buffer[1] & 0x0FF) - 161; // 第一个字符和最后一个字符没有汉字，因此每个区只收16*6-2=94个汉字
+      int b0 = (int) (buffer[0] & 0x0FF) - 161; // Code starts from A1, therefore subtract 0xA1=161
+      int b1 = (int) (buffer[1] & 0x0FF) - 161; // There is no Chinese char for the first and last symbol. 
+      											// Therefore, each code page only has 16*6-2=94 characters.
       return (short) (b0 * 94 + b1);
     } catch (UnsupportedEncodingException e) {
       e.printStackTrace();
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java	(revision 816218)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java	(working copy)
@@ -63,7 +63,7 @@
     char[] idBuffer;
     // get the list of tokens ordered and indexed
     segTokenList = segGraph.makeIndex();
-    // 因为startToken（"始##始"）的起始位置是-1因此key为-1时可以取出startToken
+    // Because the beginning position of startToken is -1, therefore startToken can be obtained when key = -1
     int key = -1;
     List nextTokens = null;
     while (key < maxStart) {
@@ -71,16 +71,17 @@
 
         List tokenList = segGraph.getStartList(key);
 
-        // 为某一个key对应的所有Token都计算一次
+        // Calculate all tokens for a given key.
         for (Iterator iter = tokenList.iterator(); iter.hasNext();) {
           SegToken t1 = (SegToken) iter.next();
           oneWordFreq = t1.weight;
           next = t1.endOffset;
           nextTokens = null;
-          // 找到下一个对应的Token，例如“阳光海岸”，当前Token是“阳光”， 下一个Token可以是“海”或者“海岸”
-          // 如果找不到下一个Token，则说明到了末尾，重新循环。
+          // Find the next corresponding Token.
+          // For example: "Sunny seashore", the present Token is "sunny", next one should be "sea" or "seashore".
+          // If we cannot find the next Token, then go to the end and repeat the same cycle.
           while (next <= maxStart) {
-            // 因为endToken的起始位置是sentenceLen，因此等于sentenceLen是可以找到endToken
+            // Because the beginning position of endToken is sentenceLen, so equal to sentenceLen can find endToken.
             if (segGraph.isStartExist(next)) {
               nextTokens = segGraph.getStartList(next);
               break;
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java	(revision 816218)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java	(working copy)
@@ -156,7 +156,8 @@
       IOException, UnsupportedEncodingException {
 
     int i, cnt, length, total = 0;
-    // 文件中只统计了6763个汉字加5个空汉字符3756~3760，其中第3756个用来存储符号信息。
+    // The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.  
+    // The 3756th is used (as a header) to store information.
     int[] buffer = new int[3];
     byte[] intBuffer = new byte[4];
     String tmpword;
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java	(revision 816218)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/HHMMSegmenter.java	(working copy)
@@ -49,7 +49,7 @@
     int[] charTypeArray = getCharTypes(sentence);
     StringBuffer wordBuf = new StringBuffer();
     SegToken token;
-    int frequency = 0; // word的出现次数
+    int frequency = 0; // the number of times word appears.
     boolean hasFullWidth;
     int wordType;
     char[] charArray;
@@ -64,7 +64,9 @@
         case CharType.HANZI:
           j = i + 1;
           wordBuf.delete(0, wordBuf.length());
-          // 不管单个汉字能不能构成词，都将单个汉字存到segGraph中去，否则会造成分此图断字
+          // It doesn't matter if a single Chinese character (Hanzi) can form a phrase or not, 
+          // it will store that single Chinese character (Hanzi) in the SegGraph.  Otherwise, it will 
+          // cause word division.
           wordBuf.append(sentence.charAt(i));
           charArray = new char[] { sentence.charAt(i) };
           frequency = wordDict.getFrequency(charArray);
@@ -75,7 +77,8 @@
           foundIndex = wordDict.getPrefixMatch(charArray);
           while (j <= length && foundIndex != -1) {
             if (wordDict.isEqual(charArray, foundIndex) && charArray.length > 1) {
-              // 就是我们要找的词， 也就是说找到了从i到j的一个成词SegToken，并且不是单字词
+              // It is the phrase we are looking for; In other words, we have found a phrase SegToken
+              // from i to j.  It is not a monosyllabic word (single word).
               frequency = wordDict.getFrequency(charArray);
               token = new SegToken(charArray, i, j, WordType.CHINESE_WORD,
                   frequency);
@@ -89,9 +92,9 @@
               wordBuf.append(sentence.charAt(j));
               charArray = new char[wordBuf.length()];
               wordBuf.getChars(0, charArray.length, charArray, 0);
-              // idArray作为前缀已经找到过(foundWordIndex!=-1),
-              // 因此加长过后的idArray只可能出现在foundWordIndex以后,
-              // 故从foundWordIndex之后开始查找
+              // idArray has been found (foundWordIndex!=-1) as a prefix before.  
+              // Therefore, idArray after it has been lengthened can only appear after foundWordIndex.  
+              // So start searching after foundWordIndex.
               foundIndex = wordDict.getPrefixMatch(charArray, foundIndex);
               j++;
             } else {
@@ -110,7 +113,7 @@
               hasFullWidth = true;
             j++;
           }
-          // 找到了从i到j的一个Token，类型为LETTER的字符串
+          // Found a Token from i to j. Type is LETTER char string.
           charArray = Utility.STRING_CHAR_ARRAY;
           frequency = wordDict.getFrequency(charArray);
           wordType = hasFullWidth ? WordType.FULLWIDTH_STRING : WordType.STRING;
@@ -128,7 +131,7 @@
               hasFullWidth = true;
             j++;
           }
-          // 找到了从i到j的一个Token，类型为NUMBER的字符串
+          // Found a Token from i to j. Type is NUMBER char string.
           charArray = Utility.NUMBER_CHAR_ARRAY;
           frequency = wordDict.getFrequency(charArray);
           wordType = hasFullWidth ? WordType.FULLWIDTH_NUMBER : WordType.NUMBER;
@@ -138,7 +141,7 @@
           break;
         case CharType.DELIMITER:
           j = i + 1;
-          // 标点符号的weight不用查了，选个最大的频率即可
+          // No need to search the weight for the punctuation.  Picking the highest frequency will work.
           frequency = Utility.MAX_FREQUENCE;
           charArray = new char[] { sentence.charAt(i) };
           token = new SegToken(charArray, i, j, WordType.DELIMITER, frequency);
@@ -147,7 +150,8 @@
           break;
         default:
           j = i + 1;
-          // 把不认识的字符当作未知串看待，例如GB2312编码之外的字符，每个字符当作一个
+          // Treat the unrecognized char symbol as unknown string.
+          // For example, any symbol not in GB2312 is treated as one of these.
           charArray = Utility.STRING_CHAR_ARRAY;
           frequency = wordDict.getFrequency(charArray);
           token = new SegToken(charArray, i, j, WordType.STRING, frequency);
@@ -157,13 +161,13 @@
       }
     }
 
-    // 为segGraph增加两个新Token： "始##始","末##末"
+    // Add two more Tokens: "beginning xx beginning"
     charArray = Utility.START_CHAR_ARRAY;
     frequency = wordDict.getFrequency(charArray);
     token = new SegToken(charArray, -1, 0, WordType.SENTENCE_BEGIN, frequency);
     segGraph.addToken(token);
 
-    // "末##末"
+    // "end xx end"
     charArray = Utility.END_CHAR_ARRAY;
     frequency = wordDict.getFrequency(charArray);
     token = new SegToken(charArray, length, length + 1, WordType.SENTENCE_END,
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java	(revision 816218)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java	(working copy)
@@ -55,19 +55,24 @@
   public static final int PRIME_INDEX_LENGTH = 12071;
 
   /**
-   * wordIndexTable保证将Unicode中的所有汉字编码hash到PRIME_INDEX_LENGTH长度的数组中，
-   * 当然会有冲突，但实际上本程序只处理GB2312字符部分，6768个字符加上一些ASCII字符，
-   * 因此对这些字符是有效的，为了保证比较的准确性，保留原来的字符在charIndexTable中以确定查找的准确性
+   * wordIndexTable guarantees to hash all Chinese characters in Unicode into 
+   * PRIME_INDEX_LENGTH array. There will be conflict, but in reality this 
+   * program only handles the 6768 characters found in GB2312 plus some 
+   * ASCII characters. Therefore in order to guarantee better precision, it is
+   * necessary to retain the original symbol in the charIndexTable.
    */
   private short[] wordIndexTable;
 
   private char[] charIndexTable;
 
   /**
-   * 存储所有词库的真正数据结构，为了避免占用空间太多，用了两个单独的多维数组来存储词组和频率。
-   * 每个词放在一个char[]中，每个char对应一个汉字或其他字符，每个频率放在一个int中，
-   * 这两个数组的前两个下表是一一对应的。因此可以利用wordItem_charArrayTable[i][j]来查词，
-   * 用wordItem_frequencyTable[i][j]来查询对应的频率
+   * To avoid taking too much space, the data structure needed to store the 
+   * lexicon requires two multidimensional arrays to store word and frequency.
+   * Each word is placed in a char[]. Each char represents a Chinese char or 
+   * other symbol.  Each frequency is put into an int. These two arrays 
+   * correspond to each other one-to-one. Therefore, one can use 
+   * wordItem_charArrayTable[i][j] to look up word from lexicon, and 
+   * wordItem_frequencyTable[i][j] to look up the corresponding frequency. 
    */
   private char[][][] wordItem_charArrayTable;
 
@@ -193,7 +198,8 @@
   private int loadMainDataFromFile(String dctFilePath)
       throws FileNotFoundException, IOException, UnsupportedEncodingException {
     int i, cnt, length, total = 0;
-    // 文件中只统计了6763个汉字加5个空汉字符3756~3760，其中第3756个用来存储符号信息。
+    // The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.
+    // The 3756th is used (as a header) to store information.
     int[] buffer = new int[3];
     byte[] intBuffer = new byte[4];
     String tmpword;
@@ -255,33 +261,37 @@
   }
 
   /**
-   * 原词库将所有标点符号的信息合并到一个列表里(从1开始的3755处)。这里将其展开，分别放到各个符号对应的列表中
+   * The original lexicon puts all information with punctuation into a 
+   * chart (from 1 to 3755). Here it then gets expanded, separately being
+   * placed into the chart that has the corresponding symbol.
    */
   private void expandDelimiterData() {
     int i;
     int cnt;
-    // 标点符号在从1开始的3755处，将原始的标点符号对应的字典分配到对应的标点符号中
+    // Punctuation then treating index 3755 as 1, 
+    // distribute the original punctuation corresponding dictionary into 
     int delimiterIndex = 3755 + GB2312_FIRST_CHAR;
     i = 0;
     while (i < wordItem_charArrayTable[delimiterIndex].length) {
       char c = wordItem_charArrayTable[delimiterIndex][i][0];
-      int j = getGB2312Id(c);// 该标点符号应该所在的index值
+      int j = getGB2312Id(c);// the id value of the punctuation
       if (wordItem_charArrayTable[j] == null) {
 
         int k = i;
-        // 从i开始计数后面以j开头的符号的worditem的个数
+        // Starting from i, count the number of the following worditem symbol from j
         while (k < wordItem_charArrayTable[delimiterIndex].length
             && wordItem_charArrayTable[delimiterIndex][k][0] == c) {
           k++;
         }
-        // 此时k-i为id为j的标点符号对应的wordItem的个数
+        // c is the punctuation character, j is the id value of c
+        // k-1 represents the index of the last punctuation character
         cnt = k - i;
         if (cnt != 0) {
           wordItem_charArrayTable[j] = new char[cnt][];
           wordItem_frequencyTable[j] = new int[cnt];
         }
 
-        // 为每一个wordItem赋值
+        // Assign value for each wordItem.
         for (k = 0; k < cnt; k++, i++) {
           // wordItemTable[j][k] = new WordItem();
           wordItem_frequencyTable[j][k] = wordItem_frequencyTable[delimiterIndex][i];
@@ -293,7 +303,7 @@
         setTableIndex(c, j);
       }
     }
-    // 将原符号对应的数组删除
+    // Delete the original corresponding symbol array.
     wordItem_charArrayTable[delimiterIndex] = null;
     wordItem_frequencyTable[delimiterIndex] = null;
   }
@@ -362,8 +372,8 @@
   }
 
   /*
-   * 计算字符c在哈希表中应该在的位置，然后将地址列表中该位置的值初始化
-   * 
+   * Calculate character c's position in hash table, 
+   * then initialize the value of that position in the address table.
    */
   private boolean setTableIndex(char c, int j) {
     int index = getAvaliableTableIndex(c);
@@ -420,12 +430,14 @@
   }
 
   /**
-   * 在字典库中查找单词对应的char数组为charArray的字符串。返回该单词在单词序列中的位置
+   * Look up the text string corresponding with the word char array, 
+   * and return the position of the word list.
    * 
-   * @param knownHashIndex 已知单词第一个字符charArray[0]在hash表中的位置，如果未计算，可以用函数int
-   *        findInTable(char[] charArray) 代替
-   * @param charArray 查找单词对应的char数组
-   * @return 单词在单词数组中的位置，如果没找到则返回-1
+   * @param knownHashIndex already figure out position of the first word 
+   *   symbol charArray[0] in hash table. If not calculated yet, can be 
+   *   replaced with function int findInTable(char[] charArray).
+   * @param charArray look up the char array corresponding with the word.
+   * @return word location in word array.  If not found, then return -1.
    */
   private int findInTable(short knownHashIndex, char[] charArray) {
     if (charArray == null || charArray.length == 0)
@@ -488,7 +500,7 @@
             && Utility.compareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
           mid--;
         mid++;
-        return mid;// 找到第一个以charArray为前缀的单词
+        return mid;// Find the first word that uses charArray as prefix.
       } else if (cmpResult < 0)
         end = mid - 1;
       else
