Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleBoundaryScannerTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleBoundaryScannerTest.java (revision 9fcd35df2ce3f9bf1c57f8bba78173d0ca54a152) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleBoundaryScannerTest.java (revision ) @@ -36,6 +36,10 @@ start = TEXT.indexOf("formance"); int expected = TEXT.indexOf("high-performance"); assertEquals(expected, scanner.findStartOffset(text, start)); + + start = TEXT.indexOf("che"); + expected = TEXT.indexOf("Apache"); + assertEquals(expected, scanner.findStartOffset(text, start)); } public void testFindEndOffset() throws Exception { Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleBoundaryScanner.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleBoundaryScanner.java (revision 9fcd35df2ce3f9bf1c57f8bba78173d0ca54a152) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleBoundaryScanner.java (revision ) @@ -61,6 +61,10 @@ if( boundaryChars.contains( buffer.charAt( offset - 1 ) ) ) return offset; offset--; } + // if we scanned up to the start of the text, return it, its a "boundary" + if (offset == 0) { + return 0; + } // not found return start; }