Index: contrib/benchmark/CHANGES.txt =================================================================== --- contrib/benchmark/CHANGES.txt (revision 830393) +++ contrib/benchmark/CHANGES.txt (working copy) @@ -4,6 +4,11 @@ $Id:$ +10/28/2009 + LUCENE-1994: Fix thread safety of EnwikiContentSource and DocMaker + when doc.reuse.fields is false. Also made docs.reuse.fields=true + thread safe. (Mark Miller, Shai Erera, Mike McCandless) + 8/4/2009 LUCENE-1770: Add EnwikiQueryMaker (Mark Miller) Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 830393) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -40,6 +40,8 @@ import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.store.Directory; +import org.apache.lucene.search.FieldCache.StringIndex; +import org.apache.lucene.search.FieldCache; import junit.framework.TestCase; @@ -268,6 +270,42 @@ ir.close(); } + // LUCENE-1994: test thread safety of SortableSingleDocMaker + public void testDocMakerThreadSafety() throws Exception { + // 1. alg definition (required in every "logic" test) + String algLines[] = { + "# ----- properties ", + "content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource", + "doc.term.vector=false", + "log.step.AddDoc=10000", + "content.source.forever=true", + "directory=RAMDirectory", + "doc.reuse.fields=false", + "doc.stored=false", + "doc.tokenized=false", + "doc.index.props=true", + "# ----- alg ", + "CreateIndex", + "[ { AddDoc > : 2500 ] : 4", + "CloseIndex", + }; + + // 2. we test this value later + CountingSearchTestTask.numSearches = 0; + + // 3. execute the algorithm (required in every "logic" test) + Benchmark benchmark = execBenchmark(algLines); + + IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true); + StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country"); + final int maxDoc = r.maxDoc(); + assertEquals(10000, maxDoc); + for(int i=0;i<10000;i++) { + assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]); + } + r.close(); + } + /** * Test Parallel Doc Maker logic (for LUCENE-940) */ Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java (revision 830393) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java (working copy) @@ -266,7 +266,7 @@ } } - public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { + public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { String[] tuple = parser.next(); docData.clear(); docData.setName(tuple[ID]); Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 830472) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (working copy) @@ -145,7 +145,6 @@ protected ContentSource source; protected boolean reuseFields; - protected DocState localDocState; protected boolean indexProperties; private int lastPrintedNumUniqueTexts = 0; @@ -159,7 +158,7 @@ // reset the docdata properties so they are not added more than once. private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException { - final DocState ds = reuseFields ? getDocState() : localDocState; + final DocState ds = getDocState(); final Document doc = reuseFields ? ds.doc : new Document(); doc.getFields().clear(); @@ -286,7 +285,7 @@ */ public Document makeDocument() throws Exception { resetLeftovers(); - DocData docData = source.getNextDocData(reuseFields ? getDocState().docData : localDocState.docData); + DocData docData = source.getNextDocData(getDocState().docData); Document doc = createDocument(docData, 0, -1); return doc; } @@ -301,7 +300,7 @@ || lvr.docdata.getBody().length() == 0) { resetLeftovers(); } - DocData docData = reuseFields ? getDocState().docData : localDocState.docData; + DocData docData = getDocState().docData; DocData dd = (lvr == null ? source.getNextDocData(docData) : lvr.docdata); int cnt = (lvr == null ? 0 : lvr.cnt); while (dd.getBody() == null || dd.getBody().length() < size) { @@ -404,14 +403,11 @@ storeBytes = config.get("doc.store.body.bytes", false); reuseFields = config.get("doc.reuse.fields", true); - if (!reuseFields) { - localDocState = new DocState(false, storeVal, indexVal, bodyIndexVal, termVecVal); - } else { - // In a multi-rounds run, it is important to reset DocState since settings - // of fields may change between rounds, and this is the only way to reset - // the cache of all threads. - docState = new ThreadLocal(); - } + + // In a multi-rounds run, it is important to reset DocState since settings + // of fields may change between rounds, and this is the only way to reset + // the cache of all threads. + docState = new ThreadLocal(); indexProperties = config.get("doc.index.props", false);