Index: lucene/contrib/benchmark/CHANGES.txt =================================================================== --- lucene/contrib/benchmark/CHANGES.txt (revision 938459) +++ lucene/contrib/benchmark/CHANGES.txt (working copy) @@ -2,6 +2,8 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. +4/27/2010: WriteLineDocTask now supports multi-threading. (Shai Erera) + 4/07/2010 LUCENE-2377: Enable the use of NoMergePolicy and NoMergeScheduler by CreateIndexTask. (Shai Erera) Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java (working copy) @@ -17,7 +17,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; /** * Abstract base query maker. @@ -45,7 +44,7 @@ StringBuffer sb = new StringBuffer(); if (queries != null) { for (int i = 0; i < queries.length; i++) { - sb.append(i+". "+ Format.simpleName(queries[i].getClass())+" - "+queries[i].toString()); + sb.append(i+". "+ queries[i].getClass().getSimpleName()+" - "+queries[i].toString()); sb.append(newline); } } Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java (working copy) @@ -40,8 +40,6 @@ * denotes a relative path (default=work). *
  • docs.dir - specifies the directory the Dir collection. Can be set * to a relative path if "work.dir" is also specified (default=dir-out). - *
  • html.parser - specifies the {@link HTMLParser} class to use for - * parsing the TREC documents content (default=DemoHTMLParser). * */ public class DirContentSource extends ContentSource { Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (working copy) @@ -333,7 +333,7 @@ String col = " "; StringBuffer sb = new StringBuffer(); String newline = System.getProperty("line.separator"); - sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline); + sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); int nut = source.getTotalDocsCount(); if (nut > lastPrintedNumUniqueTexts) { print = true; Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java (working copy) @@ -1,34 +1,56 @@ package org.apache.lucene.benchmark.byTask.feeds; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import org.apache.lucene.util.English; import java.io.IOException; import java.util.Date; - /** - * - * - **/ + * Creates documents whose content is a long number starting from + * {@link Long#MIN_VALUE} + 10. + */ public class LongToEnglishContentSource extends ContentSource{ private long counter = Long.MIN_VALUE + 10; @Override public void close() throws IOException { - } - //TODO: reduce/clean up synchonization + @Override public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { docData.clear(); - docData.setBody(English.longToEnglish(counter)); - docData.setName("doc_" + String.valueOf(counter)); - docData.setTitle("title_" + String.valueOf(counter)); + // store the current counter to avoid synchronization + long curCounter; + synchronized (this) { + curCounter = counter; + } + docData.setBody(English.longToEnglish(curCounter)); + docData.setName("doc_" + String.valueOf(curCounter)); + docData.setTitle("title_" + String.valueOf(curCounter)); docData.setDate(new Date()); - if (counter == Long.MAX_VALUE){ - counter = Long.MIN_VALUE + 10;//loop around + synchronized (this) { + if (counter == Long.MAX_VALUE){ + counter = Long.MIN_VALUE + 10;//loop around + } + counter++; } - counter++; return docData; } @@ -36,4 +58,5 @@ public void resetInputs() throws IOException { counter = Long.MIN_VALUE + 10; } + } Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (working copy) @@ -32,7 +32,7 @@ import java.util.zip.GZIPInputStream; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.StringBufferReader; +import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader; import org.apache.lucene.util.ThreadInterruptedException; /** @@ -53,7 +53,6 @@ * */ public class TrecContentSource extends ContentSource { - // TODO (3.0): change StringBuffer to StringBuilder private static final class DateFormatInfo { DateFormat[] dfs; @@ -79,8 +78,8 @@ }; private ThreadLocal dateFormats = new ThreadLocal(); - private ThreadLocal trecDocReader = new ThreadLocal(); - private ThreadLocal trecDocBuffer = new ThreadLocal(); + private ThreadLocal trecDocReader = new ThreadLocal(); + private ThreadLocal trecDocBuffer = new ThreadLocal(); private File dataDir = null; private ArrayList inputFiles = new ArrayList(); private int nextFile = 0; @@ -110,19 +109,19 @@ return dfi; } - private StringBuffer getDocBuffer() { - StringBuffer sb = trecDocBuffer.get(); + private StringBuilder getDocBuffer() { + StringBuilder sb = trecDocBuffer.get(); if (sb == null) { - sb = new StringBuffer(); + sb = new StringBuilder(); trecDocBuffer.set(sb); } return sb; } - private Reader getTrecDocReader(StringBuffer docBuffer) { - StringBufferReader r = trecDocReader.get(); + private Reader getTrecDocReader(StringBuilder docBuffer) { + StringBuilderReader r = trecDocReader.get(); if (r == null) { - r = new StringBufferReader(docBuffer); + r = new StringBuilderReader(docBuffer); trecDocReader.set(r); } else { r.set(docBuffer); @@ -131,7 +130,7 @@ } // read until finding a line that starts with the specified prefix, or a terminating tag has been found. - private void read(StringBuffer buf, String prefix, boolean collectMatchLine, + private void read(StringBuilder buf, String prefix, boolean collectMatchLine, boolean collectAll, String terminatingTag) throws IOException, NoMoreDataException { String sep = ""; @@ -248,7 +247,7 @@ openNextFile(); } - StringBuffer docBuf = getDocBuffer(); + StringBuilder docBuf = getDocBuffer(); // 1. skip until doc start docBuf.setLength(0); Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (working copy) @@ -23,7 +23,6 @@ import org.apache.lucene.benchmark.byTask.stats.Points; import org.apache.lucene.benchmark.byTask.stats.TaskStats; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; /** * An abstract task to be tested for performance.
    @@ -67,7 +66,7 @@ /** Should not be used externally */ private PerfTask() { - name = Format.simpleName(getClass()); + name = getClass().getSimpleName(); if (name.endsWith("Task")) { name = name.substring(0, name.length() - 4); } @@ -99,13 +98,7 @@ this.maxDepthLogStart = config.get("task.max.depth.log",0); String logStepAtt = "log.step"; - // TODO (1.5): call getClass().getSimpleName() instead. - String taskName = getClass().getName(); - int idx = taskName.lastIndexOf('.'); - // To support test internal classes. when we move to getSimpleName, this can be removed. - int idx2 = taskName.indexOf('$', idx); - if (idx2 != -1) idx = idx2; - String taskLogStepAtt = "log.step." + taskName.substring(idx + 1, taskName.length() - 4 /* w/o the 'Task' part */); + String taskLogStepAtt = "log.step." + name; if (config.get(taskLogStepAtt, null) != null) { logStepAtt = taskLogStepAtt; } Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java (working copy) @@ -22,6 +22,7 @@ import java.io.FileOutputStream; import java.io.OutputStream; import java.io.OutputStreamWriter; +import java.io.PrintWriter; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -56,8 +57,9 @@ private static final Matcher NORMALIZER = Pattern.compile("[\t\r\n]+").matcher(""); private int docSize = 0; - private BufferedWriter lineFileOut = null; + private PrintWriter lineFileOut = null; private DocMaker docMaker; + private ThreadLocal threadBuffer = new ThreadLocal(); public WriteLineDocTask(PerfRunData runData) throws Exception { super(runData); @@ -85,7 +87,7 @@ out = new BufferedOutputStream(out, 1 << 16); out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out); } - lineFileOut = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16); + lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16)); docMaker = runData.getDocMaker(); } @@ -109,12 +111,15 @@ f = doc.getField(DocMaker.DATE_FIELD); String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : ""; - lineFileOut.write(title, 0, title.length()); - lineFileOut.write(SEP); - lineFileOut.write(date, 0, date.length()); - lineFileOut.write(SEP); - lineFileOut.write(body, 0, body.length()); - lineFileOut.newLine(); + StringBuilder sb = threadBuffer.get(); + if (sb == null) { + sb = new StringBuilder(); + threadBuffer.set(sb); + } + sb.setLength(0); + sb.append(title).append(SEP).append(date).append(SEP).append(body); + // lineFileOut is a PrintWriter, which synchronizes internally in println. + lineFileOut.println(sb.toString()); } return 1; } Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (working copy) @@ -107,19 +107,4 @@ return res.substring(res.length() - col.length()); } - /** - * Extract simple class name - * @param cls class whose simple name is required - * @return simple class name - */ - public static String simpleName (Class cls) { - String c = cls.getName(); - String p = cls.getPackage().getName(); - int k = c.lastIndexOf(p+"."); - if (k<0) { - return c; - } - return c.substring(k+1+p.length()); - } - } Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java (working copy) @@ -1,181 +0,0 @@ -package org.apache.lucene.benchmark.byTask.utils; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.Reader; - -/** - * Implements a {@link Reader} over a {@link StringBuffer} instance. Although - * one can use {@link java.io.StringReader} by passing it - * {@link StringBuffer#toString()}, it is better to use this class, as it - * doesn't mark the passed-in {@link StringBuffer} as shared (which will cause - * inner char[] allocations at the next append() attempt).
    - * Notes: - *
      - *
    • This implementation assumes the underlying {@link StringBuffer} is not - * changed during the use of this {@link Reader} implementation. - *
    • This implementation is thread-safe. - *
    • The implementation looks very much like {@link java.io.StringReader} (for - * the right reasons). - *
    • If one wants to reuse that instance, then the following needs to be done: - *
      - * StringBuffer sb = new StringBuffer("some text");
      - * Reader reader = new StringBufferReader(sb);
      - * ... read from reader - don't close it ! ...
      - * sb.setLength(0);
      - * sb.append("some new text");
      - * reader.reset();
      - * ... read the new string from the reader ...
      - * 
      - *
    - */ -public class StringBufferReader extends Reader { - - // TODO (3.0): change to StringBuffer (including the name of the class) - - // The StringBuffer to read from. - private StringBuffer sb; - - // The length of 'sb'. - private int length; - - // The next position to read from the StringBuffer. - private int next = 0; - - // The mark position. The default value 0 means the start of the text. - private int mark = 0; - - public StringBufferReader(StringBuffer sb) { - set(sb); - } - - /** Check to make sure that the stream has not been closed. */ - private void ensureOpen() throws IOException { - if (sb == null) { - throw new IOException("Stream has already been closed"); - } - } - - @Override - public void close() { - synchronized (lock) { - sb = null; - } - } - - /** - * Mark the present position in the stream. Subsequent calls to reset() will - * reposition the stream to this point. - * - * @param readAheadLimit Limit on the number of characters that may be read - * while still preserving the mark. Because the stream's input comes - * from a StringBuffer, there is no actual limit, so this argument - * must not be negative, but is otherwise ignored. - * @exception IllegalArgumentException If readAheadLimit is < 0 - * @exception IOException If an I/O error occurs - */ - @Override - public void mark(int readAheadLimit) throws IOException { - if (readAheadLimit < 0){ - throw new IllegalArgumentException("Read-ahead limit cannpt be negative: " + readAheadLimit); - } - synchronized (lock) { - ensureOpen(); - mark = next; - } - } - - @Override - public boolean markSupported() { - return true; - } - - @Override - public int read() throws IOException { - synchronized (lock) { - ensureOpen(); - return next >= length ? -1 : sb.charAt(next++); - } - } - - @Override - public int read(char cbuf[], int off, int len) throws IOException { - synchronized (lock) { - ensureOpen(); - - // Validate parameters - if (off < 0 || off > cbuf.length || len < 0 || off + len > cbuf.length) { - throw new IndexOutOfBoundsException("off=" + off + " len=" + len + " cbuf.length=" + cbuf.length); - } - - if (len == 0) { - return 0; - } - - if (next >= length) { - return -1; - } - - int n = Math.min(length - next, len); - sb.getChars(next, next + n, cbuf, off); - next += n; - return n; - } - } - - @Override - public boolean ready() throws IOException { - synchronized (lock) { - ensureOpen(); - return true; - } - } - - @Override - public void reset() throws IOException { - synchronized (lock) { - ensureOpen(); - next = mark; - length = sb.length(); - } - } - - public void set(StringBuffer sb) { - synchronized (lock) { - this.sb = sb; - length = sb.length(); - } - } - @Override - public long skip(long ns) throws IOException { - synchronized (lock) { - ensureOpen(); - if (next >= length) { - return 0; - } - - // Bound skip by beginning and end of the source - long n = Math.min(length - next, ns); - n = Math.max(-next, n); - next += n; - return n; - } - } - -} Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java (revision 938459) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java (working copy) @@ -21,22 +21,22 @@ import java.io.Reader; /** - * Implements a {@link Reader} over a {@link StringBuffer} instance. Although + * Implements a {@link Reader} over a {@link StringBuilder} instance. Although * one can use {@link java.io.StringReader} by passing it - * {@link StringBuffer#toString()}, it is better to use this class, as it - * doesn't mark the passed-in {@link StringBuffer} as shared (which will cause + * {@link StringBuilder#toString()}, it is better to use this class, as it + * doesn't mark the passed-in {@link StringBuilder} as shared (which will cause * inner char[] allocations at the next append() attempt).
    * Notes: *
      - *
    • This implementation assumes the underlying {@link StringBuffer} is not + *
    • This implementation assumes the underlying {@link StringBuilder} is not * changed during the use of this {@link Reader} implementation. *
    • This implementation is thread-safe. *
    • The implementation looks very much like {@link java.io.StringReader} (for * the right reasons). *
    • If one wants to reuse that instance, then the following needs to be done: *
      - * StringBuffer sb = new StringBuffer("some text");
      - * Reader reader = new StringBufferReader(sb);
      + * StringBuilder sb = new StringBuilder("some text");
      + * Reader reader = new StringBuilderReader(sb);
        * ... read from reader - don't close it ! ...
        * sb.setLength(0);
        * sb.append("some new text");
      @@ -45,23 +45,21 @@
        * 
      *
    */ -public class StringBufferReader extends Reader { +public class StringBuilderReader extends Reader { - // TODO (3.0): change to StringBuffer (including the name of the class) - - // The StringBuffer to read from. - private StringBuffer sb; + // The StringBuilder to read from. + private StringBuilder sb; // The length of 'sb'. private int length; - // The next position to read from the StringBuffer. + // The next position to read from the StringBuilder. private int next = 0; // The mark position. The default value 0 means the start of the text. private int mark = 0; - public StringBufferReader(StringBuffer sb) { + public StringBuilderReader(StringBuilder sb) { set(sb); } @@ -85,7 +83,7 @@ * * @param readAheadLimit Limit on the number of characters that may be read * while still preserving the mark. Because the stream's input comes - * from a StringBuffer, there is no actual limit, so this argument + * from a StringBuilder, there is no actual limit, so this argument * must not be negative, but is otherwise ignored. * @exception IllegalArgumentException If readAheadLimit is < 0 * @exception IOException If an I/O error occurs @@ -156,7 +154,7 @@ } } - public void set(StringBuffer sb) { + public void set(StringBuilder sb) { synchronized (lock) { this.sb = sb; length = sb.length(); Index: lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java =================================================================== --- lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (revision 938459) +++ lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (working copy) @@ -22,7 +22,9 @@ import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; +import java.util.HashSet; import java.util.Properties; +import java.util.Set; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.lucene.benchmark.BenchmarkTestCase; @@ -97,6 +99,21 @@ } } + // class has to be public so that Class.forName.newInstance() will work + public static final class ThreadingDocMaker extends DocMaker { + + @Override + public Document makeDocument() throws Exception { + Document doc = new Document(); + String name = Thread.currentThread().getName(); + doc.add(new Field(BODY_FIELD, "body_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(TITLE_FIELD, "title_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(DATE_FIELD, "date_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + return doc; + } + + } + private static final CompressorStreamFactory csFactory = new CompressorStreamFactory(); private PerfRunData createPerfRunData(File file, boolean setBZCompress, @@ -225,5 +242,49 @@ br.close(); } } - + + public void testMultiThreaded() throws Exception { + File file = new File(getWorkDir(), "one-line"); + PerfRunData runData = createPerfRunData(file, false, null, ThreadingDocMaker.class.getName()); + final WriteLineDocTask wldt = new WriteLineDocTask(runData); + Thread[] threads = new Thread[10]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new Thread("t" + i) { + @Override + public void run() { + try { + wldt.doLogic(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }; + } + + for (Thread t : threads) t.start(); + for (Thread t : threads) t.join(); + + wldt.close(); + + Set ids = new HashSet(); + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8")); + try { + for (int i = 0; i < threads.length; i++) { + String line = br.readLine(); + String[] parts = line.split(Character.toString(WriteLineDocTask.SEP)); + assertEquals(3, parts.length); + // check that all thread names written are the same in the same line + String tname = parts[0].substring(parts[0].indexOf('_')); + ids.add(tname); + assertEquals(tname, parts[1].substring(parts[1].indexOf('_'))); + assertEquals(tname, parts[2].substring(parts[2].indexOf('_'))); + } + // only threads.length lines should exist + assertNull(br.readLine()); + assertEquals(threads.length, ids.size()); + } finally { + br.close(); + } + } + }