Index: lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java
===================================================================
--- lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java (revision 1031463)
+++ lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java (working copy)
@@ -105,6 +105,13 @@
assertEquals(200, parser.getSummary().length());
}
+ // LUCENE-590
+ public void testSummaryTitle() throws Exception {
+ String text = "
SummarySummary of the document";
+ HTMLParser parser = new HTMLParser(new StringReader(text));
+ assertEquals("Summary of the document", parser.getSummary());
+ }
+
// LUCENE-2246
public void testTurkish() throws Exception {
String text = "" +
Index: lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java
===================================================================
--- lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java (revision 1031463)
+++ lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java (working copy)
@@ -84,7 +84,7 @@
String sum = summary.toString().trim();
String tit = getTitle();
- if (sum.startsWith(tit) || sum.equals(""))
+ if (sum.equals(""))
return tit;
else
return sum;
Index: lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj
===================================================================
--- lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj (revision 1031463)
+++ lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj (working copy)
@@ -111,7 +111,7 @@
String sum = summary.toString().trim();
String tit = getTitle();
- if (sum.startsWith(tit) || sum.equals(""))
+ if (sum.equals(""))
return tit;
else
return sum;
Index: lucene/contrib/CHANGES.txt
===================================================================
--- lucene/contrib/CHANGES.txt (revision 1031463)
+++ lucene/contrib/CHANGES.txt (working copy)
@@ -140,6 +140,9 @@
* LUCENE-2246: Fix contrib/demo for Turkish html documents.
(Selim Nadi via Robert Muir)
+
+* LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading
+ (Curtis d'Entremont via Robert Muir)
API Changes