--- HTMLParser.jj.bak 2003-10-31 11:57:43.000000000 +0100 +++ HTMLParser.jj 2003-11-06 14:21:51.000000000 +0100 @@ -66,6 +66,7 @@ package org.apache.lucene.demo.html; import java.io.*; +import java.util.HashSet; import java.util.Properties; public class HTMLParser { @@ -86,9 +87,28 @@ String eol = System.getProperty("line.separator"); PipedReader pipeIn = null; PipedWriter pipeOut; + HashSet ws_elems = new HashSet(); // HTML elements that display whitespace public HTMLParser(File file) throws FileNotFoundException { this(new FileInputStream(file)); + ws_elems.add("" does not need to be listed explicitly + ws_elems.add(" { + String tagName = t1.image.toLowerCase(); + if( ws_elems.contains(tagName) ) { + addSpace(); + } inTitle = t1.image.equalsIgnoreCase(" inMetaTag = t1.image.equalsIgnoreCase(" inStyle = t1.image.equalsIgnoreCase("