Index: modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (revision 1065889) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (working copy) @@ -261,4 +261,21 @@ doTestOffsets("X < &zz >X &# < X > < &l > &g < X"); } + static void assertLegalOffsets(String in) throws Exception { + int length = in.length(); + HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(in)))); + int ch = 0; + int off = 0; + while ((ch = reader.read()) != -1) { + int correction = reader.correctOffset(off); + assertTrue("invalid offset correction: " + off + "->" + correction + " for doc of length: " + length, + correction <= length); + off++; + } + } + + public void testLegalOffsets() throws Exception { + assertLegalOffsets("hello world"); + assertLegalOffsets("hello &#x world"); + } }