Index: BUILD.txt =================================================================== RCS file: /home/cvspublic/jakarta-lucene/BUILD.txt,v retrieving revision 1.6 diff -u -r1.6 BUILD.txt --- BUILD.txt 11 Jun 2003 17:43:47 -0000 1.6 +++ BUILD.txt 14 Jun 2003 02:25:59 -0000 @@ -86,19 +86,15 @@ http://www.experimentalstuff.com/Technologies/JavaCC/ Follow the download links and download the zip file to a temporary -location on your file system. Unzip the file and run the large class file -in the directory. On windows, use this command from the temp directory: +location on your file system. Unzip the file to the location +of your choice. (Note that JavaCC 3.0 no longer uses an installer) - java -cp . JavaCC2_1 - -This will launch a Java GUI installer. There is also a command line -installer available, and the installation class will give you those -directions. After JavaCC is installed, edit your build properties +After JavaCC is installed, edit your build properties (as in step 2), and add the line - javacc.home=/javacc/bin + javacc.home=/javacc -where this points to the bin directory of your javacc installation. +where this points to the directory of your javacc installation. Step 4) Run ant Index: build.xml =================================================================== RCS file: /home/cvspublic/jakarta-lucene/build.xml,v retrieving revision 1.36 diff -u -r1.36 build.xml --- build.xml 11 Jun 2003 17:43:47 -0000 1.36 +++ build.xml 14 Jun 2003 02:26:00 -0000 @@ -52,10 +52,23 @@ + + + + + + + @@ -99,25 +115,10 @@ - - - - - - - - + @@ -202,22 +203,7 @@ - - - - - - - - + @@ -228,7 +214,7 @@ - + @@ -239,11 +225,6 @@ - --> + + + + + + + + + + + + + + + + + + + + + + + + + + Index: default.properties =================================================================== RCS file: /home/cvspublic/jakarta-lucene/default.properties,v retrieving revision 1.14 diff -u -r1.14 default.properties --- default.properties 20 Mar 2003 18:15:04 -0000 1.14 +++ default.properties 14 Jun 2003 02:26:00 -0000 @@ -58,8 +58,8 @@ # Home directory of JavaCC javacc.home = . -javacc.zip.dir = ${javacc.home}/lib -javacc.zip = ${javacc.zip.dir}/JavaCC.zip +javacc.lib.dir = ${javacc.home}/bin/lib +javacc.jar = ${javacc.lib.dir}/javacc.jar # Home directory of jakarta-site2 jakarta.site2.home = ../jakarta-site2 Index: lib/javacc3-ant-support.jar =================================================================== RCS file: lib/javacc3-ant-support.jar diff -N lib/javacc3-ant-support.jar Binary files /dev/null and javacc3-ant-support.jar differ Index: src/demo/org/apache/lucene/demo/html/HTMLParser.java =================================================================== RCS file: src/demo/org/apache/lucene/demo/html/HTMLParser.java diff -N src/demo/org/apache/lucene/demo/html/HTMLParser.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/demo/org/apache/lucene/demo/html/HTMLParser.java 14 Jun 2003 02:26:01 -0000 @@ -0,0 +1,688 @@ +/* Generated By:JavaCC: Do not edit this line. HTMLParser.java */ +package org.apache.lucene.demo.html; + +import java.io.*; +import java.util.Properties; + +public class HTMLParser implements HTMLParserConstants { + public static int SUMMARY_LENGTH = 200; + + StringBuffer title = new StringBuffer(SUMMARY_LENGTH); + StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2); + Properties metaTags=new Properties(); + String currentMetaTag=""; + int length = 0; + boolean titleComplete = false; + boolean inTitle = false; + boolean inMetaTag = false; + boolean inStyle = false; + boolean inScript = false; + boolean afterTag = false; + boolean afterSpace = false; + String eol = System.getProperty("line.separator"); + PipedReader pipeIn = null; + PipedWriter pipeOut; + + public HTMLParser(File file) throws FileNotFoundException { + this(new FileInputStream(file)); + } + + public String getTitle() throws IOException, InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (titleComplete || (length > SUMMARY_LENGTH)) + break; + wait(10); + } + } + return title.toString().trim(); + } + + public Properties getMetaTags() throws IOException, +InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (titleComplete || (length > SUMMARY_LENGTH)) + break; + wait(10); + } + } + return metaTags; + } + + + public String getSummary() throws IOException, InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (summary.length() >= SUMMARY_LENGTH) + break; + wait(10); + } + } + if (summary.length() > SUMMARY_LENGTH) + summary.setLength(SUMMARY_LENGTH); + + String sum = summary.toString().trim(); + String tit = getTitle(); + if (sum.startsWith(tit)) + return sum.substring(tit.length()); + else + return sum; + } + + public Reader getReader() throws IOException { + if (pipeIn == null) { + pipeIn = new PipedReader(); + pipeOut = new PipedWriter(pipeIn); + + Thread thread = new ParserThread(this); + thread.start(); // start parsing + } + + return pipeIn; + } + + void addToSummary(String text) { + if (summary.length() < SUMMARY_LENGTH) { + summary.append(text); + if (summary.length() >= SUMMARY_LENGTH) { + synchronized(this) { + notifyAll(); + } + } + } + } + + void addText(String text) throws IOException { + if (inScript) + return; + if (inStyle) + return; + if (inMetaTag) + { + metaTags.setProperty(currentMetaTag, text); + return; + } + if (inTitle) + title.append(text); + else { + addToSummary(text); + if (!titleComplete && !title.equals("")) { // finished title + synchronized(this) { + titleComplete = true; // tell waiting threads + notifyAll(); + } + } + } + + length += text.length(); + pipeOut.write(text); + + afterSpace = false; + } + + void addSpace() throws IOException { + if (inScript) + return; + if (!afterSpace) { + if (inTitle) + title.append(" "); + else + addToSummary(" "); + + String space = afterTag ? eol : " "; + length += space.length(); + pipeOut.write(space); + afterSpace = true; + } + } + + final public void HTMLDocument() throws ParseException, IOException { + Token t; + label_1: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case TagName: + case DeclName: + case Comment1: + case Comment2: + case Word: + case Entity: + case Space: + case Punct: + ; + break; + default: + jj_la1[0] = jj_gen; + break label_1; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case TagName: + Tag(); + afterTag = true; + break; + case DeclName: + t = Decl(); + afterTag = true; + break; + case Comment1: + case Comment2: + CommentTag(); + afterTag = true; + break; + case Word: + t = jj_consume_token(Word); + addText(t.image); afterTag = false; + break; + case Entity: + t = jj_consume_token(Entity); + addText(Entities.decode(t.image)); afterTag = false; + break; + case Punct: + t = jj_consume_token(Punct); + addText(t.image); afterTag = false; + break; + case Space: + jj_consume_token(Space); + addSpace(); afterTag = false; + break; + default: + jj_la1[1] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + jj_consume_token(0); + } + + final public void Tag() throws ParseException, IOException { + Token t1, t2; + boolean inImg = false; + t1 = jj_consume_token(TagName); + inTitle = t1.image.equalsIgnoreCase(" + inMetaTag = t1.image.equalsIgnoreCase(" + inStyle = t1.image.equalsIgnoreCase(" + inImg = t1.image.equalsIgnoreCase(" + if (inScript) { // keep track if in