Index: lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java =================================================================== --- lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java (revision 1031463) +++ lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java (working copy) @@ -105,6 +105,13 @@ assertEquals(200, parser.getSummary().length()); } + // LUCENE-590 + public void testSummaryTitle() throws Exception { + String text = "SummarySummary of the document"; + HTMLParser parser = new HTMLParser(new StringReader(text)); + assertEquals("Summary of the document", parser.getSummary()); + } + // LUCENE-2246 public void testTurkish() throws Exception { String text = "" + Index: lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java =================================================================== --- lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java (revision 1031463) +++ lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java (working copy) @@ -84,7 +84,7 @@ String sum = summary.toString().trim(); String tit = getTitle(); - if (sum.startsWith(tit) || sum.equals("")) + if (sum.equals("")) return tit; else return sum; Index: lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj =================================================================== --- lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj (revision 1031463) +++ lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj (working copy) @@ -111,7 +111,7 @@ String sum = summary.toString().trim(); String tit = getTitle(); - if (sum.startsWith(tit) || sum.equals("")) + if (sum.equals("")) return tit; else return sum; Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 1031463) +++ lucene/contrib/CHANGES.txt (working copy) @@ -140,6 +140,9 @@ * LUCENE-2246: Fix contrib/demo for Turkish html documents. (Selim Nadi via Robert Muir) + +* LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading + (Curtis d'Entremont via Robert Muir) API Changes