Index: contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java =================================================================== --- contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (revision 636258) +++ contrib/analyzers/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (working copy) @@ -34,6 +34,7 @@ private int minGram, maxGram; private int gramSize; private int pos = 0; + private int increment = 1; private int inLen; private String inStr; private boolean started = false; @@ -74,17 +75,21 @@ inLen = inStr.length(); } - if (pos+gramSize > inLen) { // if we hit the end of the string - pos = 0; // reset to beginning of string - gramSize++; // increase n-gram size - if (gramSize > maxGram) // we are done - return null; - if (pos+gramSize > inLen) - return null; + if(pos+gramSize > inLen || gramSize > maxGram){ + increment=1; + pos++; + gramSize=minGram; } - String gram = inStr.substring(pos, pos+gramSize); - int oldPos = pos; - pos++; - return new Token(gram, oldPos, oldPos+gramSize); + if(pos+minGram > inLen){ + return null; + } + + Token tk = new Token(inStr.substring(pos, pos+gramSize), pos, pos+gramSize); + tk.setPositionIncrement(increment); + + increment=0; + gramSize++; + + return tk; } }