Index: dev-tools/eclipse/dot.classpath
===================================================================
--- dev-tools/eclipse/dot.classpath (revision 1361666)
+++ dev-tools/eclipse/dot.classpath (working copy)
@@ -102,6 +102,7 @@
+
Index: lucene/benchmark/build.xml
===================================================================
--- lucene/benchmark/build.xml (revision 1361666)
+++ lucene/benchmark/build.xml (working copy)
@@ -155,6 +155,7 @@
+
@@ -261,20 +262,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
Index: lucene/benchmark/ivy.xml
===================================================================
--- lucene/benchmark/ivy.xml (revision 1361666)
+++ lucene/benchmark/ivy.xml (working copy)
@@ -21,6 +21,7 @@
+
Index: lucene/benchmark/lib/nekohtml-1.9.15.jar.sha1
===================================================================
--- lucene/benchmark/lib/nekohtml-1.9.15.jar.sha1 (revision 0)
+++ lucene/benchmark/lib/nekohtml-1.9.15.jar.sha1 (working copy)
@@ -0,0 +1 @@
+a45cd7b7401d9c2264d4908182380452c03ebf8f
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java (revision 1361666)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java (working copy)
@@ -1,722 +0,0 @@
-/* Generated By:JavaCC: Do not edit this line. HTMLParser.java */
-package org.apache.lucene.benchmark.byTask.feeds.demohtml;
-
-import java.io.*;
-import java.util.Locale;
-import java.util.Properties;
-
-/**
- * Basic html parser (for demo/testing purposes only!)
- */
-public class HTMLParser implements HTMLParserConstants {
- public static int SUMMARY_LENGTH = 200;
-
- StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
- StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
- Properties metaTags=new Properties();
- String currentMetaTag=null;
- String currentMetaContent=null;
- int length = 0;
- boolean titleComplete = false;
- boolean inTitle = false;
- boolean inMetaTag = false;
- boolean inStyle = false;
- boolean afterTag = false;
- boolean afterSpace = false;
- String eol = System.getProperty("line.separator");
- Reader pipeIn = null;
- Writer pipeOut;
- private MyPipedInputStream pipeInStream = null;
- private PipedOutputStream pipeOutStream = null;
-
- public HTMLParser(Reader reader) {
- this(new FastCharStream(reader));
- }
-
- private class MyPipedInputStream extends PipedInputStream{
-
- public MyPipedInputStream(){
- super();
- }
-
- public MyPipedInputStream(PipedOutputStream src) throws IOException{
- super(src);
- }
-
- public boolean full() throws IOException{
- return this.available() >= PipedInputStream.PIPE_SIZE;
- }
- }
-
- public String getTitle() throws IOException, InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (titleComplete || pipeInStream.full())
- break;
- wait(10);
- }
- }
- return title.toString().trim();
- }
-
- public Properties getMetaTags() throws IOException,
-InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (titleComplete || pipeInStream.full())
- break;
- wait(10);
- }
- }
- return metaTags;
- }
-
-
- public String getSummary() throws IOException, InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
- break;
- wait(10);
- }
- }
- if (summary.length() > SUMMARY_LENGTH)
- summary.setLength(SUMMARY_LENGTH);
-
- String sum = summary.toString().trim();
- String tit = getTitle();
- if (sum.equals(""))
- return tit;
- else
- return sum;
- }
-
- public Reader getReader() throws IOException {
- if (pipeIn == null) {
- pipeInStream = new MyPipedInputStream();
- pipeOutStream = new PipedOutputStream(pipeInStream);
- pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE");
- pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE");
-
- Thread thread = new ParserThread(this);
- thread.start(); // start parsing
- }
-
- return pipeIn;
- }
-
- void addToSummary(String text) {
- if (summary.length() < SUMMARY_LENGTH) {
- summary.append(text);
- if (summary.length() >= SUMMARY_LENGTH) {
- synchronized(this) {
- notifyAll();
- }
- }
- }
- }
-
- void addText(String text) throws IOException {
- if (inStyle)
- return;
- if (inTitle)
- title.append(text);
- else {
- addToSummary(text);
- if (!titleComplete && !(title.length() == 0)) { // finished title
- synchronized(this) {
- titleComplete = true; // tell waiting threads
- notifyAll();
- }
- }
- }
-
- length += text.length();
- pipeOut.write(text);
-
- afterSpace = false;
- }
-
- void addMetaTag() {
- metaTags.setProperty(currentMetaTag, currentMetaContent);
- currentMetaTag = null;
- currentMetaContent = null;
- return;
- }
-
- void addSpace() throws IOException {
- if (!afterSpace) {
- if (inTitle)
- title.append(" ");
- else
- addToSummary(" ");
-
- String space = afterTag ? eol : " ";
- length += space.length();
- pipeOut.write(space);
- afterSpace = true;
- }
- }
-
- final public void HTMLDocument() throws ParseException, IOException {
- Token t;
- label_1:
- while (true) {
- switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case ScriptStart:
- case TagName:
- case DeclName:
- case Comment1:
- case Comment2:
- case Word:
- case Entity:
- case Space:
- case Punct:
- ;
- break;
- default:
- jj_la1[0] = jj_gen;
- break label_1;
- }
- switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case TagName:
- Tag();
- afterTag = true;
- break;
- case DeclName:
- t = Decl();
- afterTag = true;
- break;
- case Comment1:
- case Comment2:
- CommentTag();
- afterTag = true;
- break;
- case ScriptStart:
- ScriptTag();
- afterTag = true;
- break;
- case Word:
- t = jj_consume_token(Word);
- addText(t.image); afterTag = false;
- break;
- case Entity:
- t = jj_consume_token(Entity);
- addText(Entities.decode(t.image)); afterTag = false;
- break;
- case Punct:
- t = jj_consume_token(Punct);
- addText(t.image); afterTag = false;
- break;
- case Space:
- jj_consume_token(Space);
- addSpace(); afterTag = false;
- break;
- default:
- jj_la1[1] = jj_gen;
- jj_consume_token(-1);
- throw new ParseException();
- }
- }
- jj_consume_token(0);
- }
-
- final public void Tag() throws ParseException, IOException {
- Token t1, t2;
- boolean inImg = false;
- t1 = jj_consume_token(TagName);
- String tagName = t1.image.toLowerCase(Locale.ROOT);
- if(Tags.WS_ELEMS.contains(tagName) ) {
- addSpace();
- }
- inTitle = tagName.equalsIgnoreCase("
- inMetaTag = tagName.equalsIgnoreCase("
- inStyle = tagName.equalsIgnoreCase("" +
- "foo