Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (revision 1067853) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import org.apache.lucene.analysis.MappingCharFilter; +import org.apache.lucene.analysis.NormalizeCharMap; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -92,4 +94,12 @@ tokenizer.reset(new StringReader("abcde")); assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5}); } + + public void testCorrectOffsetWithMappingChar() throws Exception { + NormalizeCharMap map = new NormalizeCharMap(); + map.add("c", "zzz"); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new MappingCharFilter(map, new StringReader("abcde"))); + NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1); + assertTokenStreamContents(filter, new String[]{"a","b","z","z","z","d","e"}, new int[]{0,1,2,3,4,5,5}, new int[]{1,2,3,4,5,5,5}); + } } Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (revision 1067853) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (working copy) @@ -39,6 +39,7 @@ private int curGramSize; private int curPos; private int tokStart; + private int tokEnd; private TermAttribute termAtt; private OffsetAttribute offsetAtt; @@ -84,13 +85,14 @@ curGramSize = minGram; curPos = 0; tokStart = offsetAtt.startOffset(); + tokEnd = offsetAtt.endOffset(); } } while (curGramSize <= maxGram) { while (curPos+curGramSize <= curTermLength) { // while there is input clearAttributes(); termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize); - offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize); + offsetAtt.setOffset(Math.min(tokStart + curPos, tokEnd), Math.min(tokStart + curPos + curGramSize, tokEnd)); curPos++; return true; }