Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 550600) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -123,6 +123,34 @@ assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs()); } + /** + * Test Parallel Doc Maker logic (for LUCENE-940) + */ + public void testParallelDocMaker() throws Exception { + // 1. alg definition (required in every "logic" test) + String algLines[] = { + "# ----- properties ", + "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker", + "doc.add.log.step=2697", + "doc.term.vector=false", + "doc.maker.forever=false", + "directory=FSDirectory", + "doc.stored=false", + "doc.tokenized=false", + "# ----- alg ", + "CreateIndex", + "[ { AddDoc } : * ] : 4 ", + "CloseIndex", + }; + + // 2. execute the algorithm (required in every "logic" test) + Benchmark benchmark = execBenchmark(algLines); + + // 3. test number of docs in the index + IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory()); + int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton. + assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); + } // create the benchmark and execute it. private Benchmark execBenchmark(String[] algLines) throws Exception { Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 550600) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (working copy) @@ -40,8 +40,9 @@ super(runData); } - private static int logStep = -1; + private int logStep = -1; private int docSize = 0; + int count = 0; // volatile data passed between setup(), doLogic(), tearDown(). private Document doc = null; @@ -64,8 +65,7 @@ * @see PerfTask#tearDown() */ public void tearDown() throws Exception { - DocMaker docMaker = getRunData().getDocMaker(); - log(docMaker.getCount()); + log(++count); doc = null; super.tearDown(); } @@ -77,11 +77,11 @@ private void log (int count) { if (logStep<0) { - // avoid sync although race possible here + // init once per instance logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP); } if (logStep>0 && (count%logStep)==0) { - System.out.println("--> processed (add) "+count+" docs"); + System.out.println("--> "+Thread.currentThread().getName()+" processed (add) "+count+" docs"); } } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java (revision 550600) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java (working copy) @@ -43,8 +43,8 @@ super(runData); } - private static int logStep = -1; - private static int deleteStep = -1; + private int logStep = -1; + private int deleteStep = -1; private static int numDeleted = 0; private static int lastDeleted = -1; Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (revision 550600) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (working copy) @@ -37,7 +37,10 @@ "right place at the right time with the right knowledge."; // return a new docid - private synchronized int newdocid() { + private synchronized int newdocid() throws NoMoreDataException { + if (docID>0 && !forever) { + throw new NoMoreDataException(); + } return docID++; } @@ -59,11 +62,9 @@ } protected DocData getNextDocData() throws NoMoreDataException { - if (docID>0 && !forever) { - throw new NoMoreDataException(); - } + int id = newdocid(); addBytes(DOC_TEXT.length()); - return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null); + return new DocData("doc"+id, DOC_TEXT, null, null, null); } } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (revision 550600) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (working copy) @@ -39,7 +39,7 @@ */ public class ReutersDocMaker extends BasicDocMaker { - private DateFormat dateFormat; + private ThreadLocal dateFormat = new ThreadLocal(); private File dataDir = null; private ArrayList inputFiles = new ArrayList(); private int nextFile = 0; @@ -58,11 +58,21 @@ if (inputFiles.size()==0) { throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); } - // date format: 30-MAR-1987 14:22:36.87 - dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US); - dateFormat.setLenient(true); } + // get/initiate a thread-local simple date format (must do so + // because SimpleDateFormat is not thread-safe. + protected synchronized DateFormat getDateFormat () { + DateFormat df = (DateFormat) dateFormat.get(); + if (df == null) { + // date format: 30-MAR-1987 14:22:36.87 + df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US); + df.setLenient(true); + dateFormat.set(df); + } + return df; + } + protected DocData getNextDocData() throws Exception { File f = null; String name = null; @@ -95,7 +105,7 @@ addBytes(f.length()); - Date date = dateFormat.parse(dateStr.trim()); + Date date = getDateFormat().parse(dateStr.trim()); return new DocData(name, bodyBuf.toString(), title, null, date); } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java (revision 550600) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java (working copy) @@ -41,7 +41,7 @@ private static final String newline = System.getProperty("line.separator"); - private DateFormat dateFormat []; + private ThreadLocal dateFormat = new ThreadLocal(); private File dataDir = null; private ArrayList inputFiles = new ArrayList(); private int nextFile = 0; @@ -67,12 +67,6 @@ if (inputFiles.size()==0) { throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); } - // date format: 30-MAR-1987 14:22:36.87 - dateFormat = new SimpleDateFormat[DATE_FORMATS.length]; - for (int i = 0; i < dateFormat.length; i++) { - dateFormat[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US); - dateFormat[i].setLenient(true); - } } private void openNextFile() throws NoMoreDataException, Exception { @@ -177,17 +171,30 @@ // this is the next document, so parse it Date date = parseDate(dateStr); HTMLParser p = getHtmlParser(); - DocData docData = p.parse(name, date, sb, dateFormat[0]); + DocData docData = p.parse(name, date, sb, getDateFormat(0)); addBytes(sb.length()); // count char length of parsed html text (larger than the plain doc body text). return docData; } + private DateFormat getDateFormat(int n) { + DateFormat df[] = (DateFormat[]) dateFormat.get(); + if (df == null) { + df = new SimpleDateFormat[DATE_FORMATS.length]; + for (int i = 0; i < df.length; i++) { + df[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US); + df[i].setLenient(true); + } + dateFormat.set(df); + } + return df[n]; + } + private Date parseDate(String dateStr) { Date date = null; - for (int i=0; i