Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java
===================================================================
--- lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java (revision 0)
+++ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java (working copy)
@@ -0,0 +1,180 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.ParseException;
+import java.util.Properties;
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Ignore;
+import org.junit.Test;
+
+public class EnwikiContentSourceTest extends LuceneTestCase {
+
+ /** An EnwikiContentSource which works on a String and not files. */
+ private static class StringableEnwikiSource extends EnwikiContentSource {
+
+ private String docs = null;
+
+ public StringableEnwikiSource(String docs) {
+ this.docs = docs;
+ }
+
+ @SuppressWarnings("deprecation") // fine for the characters used in this test
+ @Override
+ protected InputStream openInputStream() throws IOException {
+ return new java.io.StringBufferInputStream(docs);
+ }
+
+ }
+
+ private void assertDocData(DocData dd, String expName, String expTitle, String expBody, String expDate)
+ throws ParseException {
+ assertNotNull(dd);
+ assertEquals(expName, dd.getName());
+ assertEquals(expTitle, dd.getTitle());
+ assertEquals(expBody, dd.getBody());
+ assertEquals(expDate, dd.getDate());
+ }
+
+ private void assertNoMoreDataException(EnwikiContentSource stdm) throws Exception {
+ boolean thrown = false;
+ try {
+ stdm.getNextDocData(null);
+ } catch (NoMoreDataException e) {
+ thrown = true;
+ }
+ assertTrue("Expecting NoMoreDataException", thrown);
+ }
+
+ private final String PAGE1 =
+ " \r\n" +
+ " Title1\r\n" +
+ " 0\r\n" +
+ " 1\r\n" +
+ " \r\n" +
+ " 11\r\n" +
+ " 111\r\n" +
+ " 2011-09-14T11:35:09Z\r\n" +
+ " \r\n" +
+ " Mister1111\r\n" +
+ " 1111\r\n" +
+ " \r\n" +
+ " \r\n" +
+ " /* Never mind */\r\n" +
+ " Some text 1 here\r\n" +
+ " \r\n" +
+ " \r\n";
+
+ private final String PAGE2 =
+ " \r\n" +
+ " Title2\r\n" +
+ " 0\r\n" +
+ " 2\r\n" +
+ " \r\n" +
+ " 22\r\n" +
+ " 222\r\n" +
+ " 2022-09-14T22:35:09Z\r\n" +
+ " \r\n" +
+ " Mister2222\r\n" +
+ " 2222\r\n" +
+ " \r\n" +
+ " \r\n" +
+ " /* Never mind */\r\n" +
+ " Some text 2 here\r\n" +
+ " \r\n" +
+ " \r\n";
+
+ @Test
+ public void testOneDocument() throws Exception {
+ String docs =
+ "\r\n" +
+ PAGE1 +
+ "";
+
+ EnwikiContentSource source = createContentSource(docs, false);
+
+ DocData dd = source.getNextDocData(new DocData());
+ assertDocData(dd, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
+
+ assertNoMoreDataException(source);
+ }
+
+ private EnwikiContentSource createContentSource(String docs, boolean forever) throws IOException {
+
+ Properties props = new Properties();
+ props.setProperty("print.props", "false");
+ props.setProperty("content.source.forever", Boolean.toString(forever));
+ Config config = new Config(props);
+
+ EnwikiContentSource source = new StringableEnwikiSource(docs);
+ source.setConfig(config);
+
+ // doc-maker just for initiating content source inputs
+ DocMaker docMaker = new DocMaker();
+ docMaker.setConfig(config, source);
+ docMaker.resetInputs();
+ return source;
+ }
+
+ @Test
+ public void testTwoDocuments() throws Exception {
+ String docs =
+ "\r\n" +
+ PAGE1 +
+ PAGE2 +
+ "";
+
+ EnwikiContentSource source = createContentSource(docs, false);
+
+ DocData dd1 = source.getNextDocData(new DocData());
+ assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
+
+ DocData dd2 = source.getNextDocData(new DocData());
+ assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
+
+ assertNoMoreDataException(source);
+ }
+
+ @Test
+ public void testForever() throws Exception {
+ String docs =
+ "\r\n" +
+ PAGE1 +
+ PAGE2 +
+ "";
+
+ EnwikiContentSource source = createContentSource(docs, true);
+
+ // same documents several times
+ for (int i=0; i<3; i++) {
+ DocData dd1 = source.getNextDocData(new DocData());
+ assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
+
+ DocData dd2 = source.getNextDocData(new DocData());
+ assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
+ // Don't test that NoMoreDataException is thrown, since the forever flag is turned on.
+ }
+
+ source.close();
+ }
+
+}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java (revision 1417647)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java (working copy)
@@ -53,6 +53,7 @@
private class Parser extends DefaultHandler implements Runnable {
private Thread t;
private boolean threadDone;
+ private boolean stopped = false;
private String[] tuple;
private NoMoreDataException nmde;
private StringBuilder contents = new StringBuilder();
@@ -70,31 +71,31 @@
}
String[] result;
synchronized(this){
- while(tuple == null && nmde == null && !threadDone) {
+ while(tuple == null && nmde == null && !threadDone && !stopped) {
try {
wait();
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
}
}
+ if (tuple != null) {
+ result = tuple;
+ tuple = null;
+ notify();
+ return result;
+ }
if (nmde != null) {
// Set to null so we will re-start thread in case
// we are re-used:
t = null;
throw nmde;
}
- if (t != null && threadDone) {
- // The thread has exited yet did not hit end of
- // data, so this means it hit an exception. We
- // throw NoMorDataException here to force
- // benchmark to stop the current alg:
- throw new NoMoreDataException();
- }
- result = tuple;
- tuple = null;
- notify();
+ // The thread has exited yet did not hit end of
+ // data, so this means it hit an exception. We
+ // throw NoMorDataException here to force
+ // benchmark to stop the current alg:
+ throw new NoMoreDataException();
}
- return result;
}
String time(String original) {
@@ -132,7 +133,7 @@
tmpTuple[BODY] = body.replaceAll("[\t\n]", " ");
tmpTuple[ID] = id;
synchronized(this) {
- while (tuple != null) {
+ while (tuple != null && !stopped) {
try {
wait();
} catch (InterruptedException ie) {
@@ -175,7 +176,7 @@
XMLReader reader = XMLReaderFactory.createXMLReader();
reader.setContentHandler(this);
reader.setErrorHandler(this);
- while(true){
+ while(!stopped){
final InputStream localFileIS = is;
try {
// To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
@@ -186,8 +187,7 @@
} catch (IOException ioe) {
synchronized(EnwikiContentSource.this) {
if (localFileIS != is) {
- // fileIS was closed on us, so, just fall
- // through
+ // fileIS was closed on us, so, just fall through
} else
// Exception is real
throw ioe;
@@ -200,7 +200,7 @@
return;
} else if (localFileIS == is) {
// If file is not already re-opened then re-open it now
- is = StreamUtils.inputStream(file);
+ is = openInputStream();
}
}
}
@@ -238,6 +238,17 @@
// this element should be discarded.
}
}
+
+ private void stop() {
+ synchronized (this) {
+ stopped = true;
+ if (tuple != null) {
+ tuple = null;
+ notify();
+ }
+ }
+ }
+
}
private static final Map ELEMENTS = new HashMap();
@@ -284,6 +295,7 @@
is.close();
is = null;
}
+ parser.stop();
}
}
@@ -301,18 +313,22 @@
@Override
public void resetInputs() throws IOException {
super.resetInputs();
- is = StreamUtils.inputStream(file);
+ is = openInputStream();
}
+
+ /** Open the input stream. */
+ protected InputStream openInputStream() throws IOException {
+ return StreamUtils.inputStream(file);
+ }
@Override
public void setConfig(Config config) {
super.setConfig(config);
keepImages = config.get("keep.image.only.docs", true);
String fileName = config.get("docs.file", null);
- if (fileName == null) {
- throw new IllegalArgumentException("docs.file must be set");
+ if (fileName != null) {
+ file = new File(fileName).getAbsoluteFile();
}
- file = new File(fileName).getAbsoluteFile();
}
}
Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 1417647)
+++ lucene/CHANGES.txt (working copy)
@@ -202,6 +202,10 @@
* LUCENE-4009: Improve TermsFilter.toString (Tim Costermans via Chris
Male, Mike McCandless)
+* LUCENE-4588: Fixed Benchmark's EnwikiContentSource to not
+ silently swallow last wiki document and to not leak threads
+ in 'forever' mode. (Doron Cohen)
+
Changes in Runtime Behavior
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.