Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java (revision 501181) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java (working copy) @@ -13,6 +13,7 @@ import java.util.Iterator; import java.util.List; import java.util.Arrays; +import java.util.Locale; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; @@ -66,7 +67,7 @@ public static final String INDEX_DIR = "index"; //30-MAR-1987 14:22:36.87 - private static DateFormat format = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS"); + private static DateFormat format = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US); //DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.SHORT); static{ format.setLenient(true); Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java (revision 501181) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/Sample.java (working copy) @@ -51,6 +51,10 @@ // task to report RepSumByNameTask rep = new RepSumByNameTask(runData); top.addTask(rep); + + // print algorithm + System.out.println(top.toString()); + // execute top.doLogic(); } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (revision 501181) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (working copy) @@ -210,7 +210,13 @@ */ public void setNoChildReport() { letChildReport = false; + for (Iterator it = tasks.iterator(); it.hasNext();) { + PerfTask task = (PerfTask) it.next(); + if (task instanceof TaskSequence) { + ((TaskSequence)task).setNoChildReport(); } + } + } /** * Returns the rate per minute: how many operations should be performed in a minute. Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (revision 501181) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (working copy) @@ -23,24 +23,21 @@ import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Date; -import org.apache.lucene.document.DateTools; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import java.util.Locale; + import org.apache.lucene.benchmark.byTask.utils.Config; /** * A DocMaker using the Reuters collection for its input. */ -public class ReutersDocMaker extends SimpleDocMaker { +public class ReutersDocMaker extends BasicDocMaker { private DateFormat dateFormat; private File dataDir = null; - private ArrayList txtFiles = new ArrayList(); + private ArrayList inputFiles = new ArrayList(); private int nextFile = 0; - private int round=0; - private int count = 0; + private int iteration=0; /* (non-Javadoc) * @see SimpleDocMaker#setConfig(java.util.Properties) @@ -49,48 +46,28 @@ super.setConfig(config); String d = config.get("docs.dir","reuters-out"); dataDir = new File(new File("work"),d); - addFiles(dataDir); - if (txtFiles.size()==0) { + collectFiles(dataDir,inputFiles); + if (inputFiles.size()==0) { throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); } // date format: 30-MAR-1987 14:22:36.87 - dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS"); + dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US); dateFormat.setLenient(true); } - private void addFiles(File f) { - if (!f.canRead()) { - return; - } - if (f.isDirectory()) { - File files[] = f.listFiles(); - for (int i = 0; i < files.length; i++) { - addFiles(files[i]); - } - return; - } - txtFiles.add(f); - addUniqueBytes(f.length()); - } - - /* (non-Javadoc) - * @see SimpleDocMaker#makeDocument() - */ - public Document makeDocument() throws Exception { + protected DocData getNextDocData() throws Exception { File f = null; String name = null; synchronized (this) { - f = (File) txtFiles.get(nextFile++); - name = f.getCanonicalPath()+"_"+round; - if (nextFile >= txtFiles.size()) { + f = (File) inputFiles.get(nextFile++); + name = f.getCanonicalPath()+"_"+iteration; + if (nextFile >= inputFiles.size()) { // exhausted files, start a new round nextFile = 0; - round++; + iteration++; } } - Document doc = new Document(); - doc.add(new Field("name",name,storeVal,indexVal,termVecVal)); BufferedReader reader = new BufferedReader(new FileReader(f)); String line = null; //First line is the date, 3rd is the title, rest is body @@ -98,27 +75,23 @@ reader.readLine();//skip an empty line String title = reader.readLine(); reader.readLine();//skip an empty line - StringBuffer body = new StringBuffer(1024); + StringBuffer bodyBuf = new StringBuffer(1024); while ((line = reader.readLine()) != null) { - body.append(line).append(' '); + bodyBuf.append(line).append(' '); } - Date date = dateFormat.parse(dateStr.trim()); - doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND), - Field.Store.YES, Field.Index.UN_TOKENIZED)); - - if (title != null) { - doc.add(new Field("title", title, storeVal,indexVal,termVecVal)); - } - if (body.length() > 0) { - doc.add(new Field("body", body.toString(), storeVal,indexVal,termVecVal)); - } - - count++; + addBytes(f.length()); - return doc; + DocData dd = new DocData(); + + dd.date = dateFormat.parse(dateStr.trim()); + dd.name = name; + dd.title = title; + dd.body = bodyBuf.toString(); + return dd; } + /* * (non-Javadoc) * @see DocMaker#resetIinputs() @@ -126,8 +99,7 @@ public synchronized void resetInputs() { super.resetInputs(); nextFile = 0; - round = 0; - count = 0; + iteration = 0; } /* @@ -135,22 +107,7 @@ * @see DocMaker#numUniqueTexts() */ public int numUniqueTexts() { - return txtFiles.size(); + return inputFiles.size(); } - /* - * (non-Javadoc) - * @see DocMaker#getCount() - */ - public int getCount() { - return count; - } - - /* - * (non-Javadoc) - * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int) - */ - public Document makeDocument(int size) throws Exception { - throw new Exception(this+".makeDocument (int size) is not supported!"); - } } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (revision 501181) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (working copy) @@ -17,29 +17,13 @@ * limitations under the License. */ -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; - - /** * Create documents for the test */ -public class SimpleDocMaker implements DocMaker { +public class SimpleDocMaker extends BasicDocMaker { - static final String BODY_FIELD = "body"; private int docID = 0; - private long numBytes = 0; - private long numUniqueBytes = 0; - protected Config config; - private int nextDocTextPosition = 0; // for creating docs of fixed size. - - protected Field.Store storeVal = Field.Store.NO; - protected Field.Index indexVal = Field.Index.TOKENIZED; - protected Field.TermVector termVecVal = Field.TermVector.NO; - static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org "Well it may be a little dramatic but sometimes it true. " + "If you call the emergency medical services to an incident, " + @@ -52,100 +36,18 @@ "ones and the stranger whose life may depend on you being in the " + "right place at the right time with the right knowledge."; - private static int DOC_TEXT_LENGTH = DOC_TEXT.length(); - - /* - * (non-Javadoc) - * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument() - */ - public Document makeDocument () throws Exception { - return makeDocument(0); - } - - /* - * (non-Javadoc) - * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int) - */ - public Document makeDocument(int size) throws Exception { - int docid = newdocid(); - Document doc = new Document(); - doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal)); - String docText = createDocText(size); - doc.add(new Field(BODY_FIELD, "synthetic body text"+docid+" "+docText, storeVal, indexVal, termVecVal)); - addBytes(docText.length()); // should multiply by 2 here? - return doc; - } - - private synchronized int[] nextDocText(int fixedDocSize) { - int from = nextDocTextPosition; - int to = nextDocTextPosition; - int wraps = 0; - int size = 0; - - while (size 0) { - sb.append(DOC_TEXT.substring(from)); - from = 0; - } - sb.append(DOC_TEXT.substring(from,to)); - return sb.toString(); - } - // return a new docid private synchronized int newdocid() { return docID++; } - /* (non-Javadoc) - * @see DocMaker#setConfig(java.util.Properties) - */ - public void setConfig(Config config) { - this.config = config; - boolean stored = config.get("doc.stored",false); - boolean tokenized = config.get("doc.tokenized",true); - boolean termVec = config.get("doc.term.vector",false); - storeVal = (stored ? Field.Store.YES : Field.Store.NO); - indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED); - termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO); - } - /* * (non-Javadoc) * @see DocMaker#resetIinputs() */ public synchronized void resetInputs() { - printDocStatistics(); + super.resetInputs(); docID = 0; - numBytes = 0; } /* @@ -156,72 +58,12 @@ return 0; // not applicable } - /* - * (non-Javadoc) - * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes() - */ - public long numUniqueBytes() { - return numUniqueBytes; + protected DocData getNextDocData() { + DocData dd = new DocData(); + dd.body = DOC_TEXT; + dd.name = "doc"+newdocid(); + addBytes(DOC_TEXT.length()); + return dd; } - /* - * (non-Javadoc) - * @see DocMaker#getCount() - */ - public int getCount() { - return docID; - } - - /* - * (non-Javadoc) - * @see DocMaker#getByteCount() - */ - public long getByteCount() { - return numBytes; - } - - protected void addUniqueBytes (long n) { - numUniqueBytes += n; - } - - protected void addBytes (long n) { - numBytes += n; - } - - /* - * (non-Javadoc) - * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics() - */ - private int lastPrintedNumUniqueTexts = 0; - private long lastPrintedNumUniqueBytes = 0; - private int printNum = 0; - public void printDocStatistics() { - boolean print = false; - String col = " "; - StringBuffer sb = new StringBuffer(); - String newline = System.getProperty("line.separator"); - sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline); - int nut = numUniqueTexts(); - if (nut > lastPrintedNumUniqueTexts) { - print = true; - sb.append("total bytes of unique texts: ").append(Format.format(0,nut,col)).append(newline); - lastPrintedNumUniqueTexts = nut; - } - long nub = numUniqueBytes(); - if (nub > lastPrintedNumUniqueBytes) { - print = true; - sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline); - lastPrintedNumUniqueBytes = nub; - } - if (getCount()>0) { - print = true; - sb.append("num files added since last inputs reset: ").append(Format.format(0,getCount(),col)).append(newline); - sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline); - } - if (print) { - System.out.println(sb.append(newline).toString()); - printNum++; - } - } - } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java (revision 0) @@ -0,0 +1,210 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Locale; +import java.util.Properties; +import java.util.zip.GZIPInputStream; + +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.demo.html.HTMLParser; + + +/** + * A DocMaker using the (compressed) Trec collection for its input. + */ +public class TrecDocMaker extends BasicDocMaker { + + private static final String newline = System.getProperty("line.separator"); + + private DateFormat dateFormat; + private File dataDir = null; + private ArrayList inputFiles = new ArrayList(); + private int nextFile = 0; + private int iteration=0; + private BufferedReader reader; + private GZIPInputStream zis; + + /* (non-Javadoc) + * @see SimpleDocMaker#setConfig(java.util.Properties) + */ + public void setConfig(Config config) { + super.setConfig(config); + String d = config.get("docs.dir","trec"); + dataDir = new File(new File("work"),d); + collectFiles(dataDir,inputFiles); + if (inputFiles.size()==0) { + throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); + } + // date format: 30-MAR-1987 14:22:36.87 + dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy kk:mm:ss ",Locale.US); //Tue, 09 Dec 2003 22:39:08 GMT + dateFormat.setLenient(true); + } + + private void openNextFile() throws Exception { + closeInputs(); + int retries = 0; + while (retries<20) { + File f = null; + synchronized (this) { + f = (File) inputFiles.get(nextFile++); + if (nextFile >= inputFiles.size()) { + // exhausted files, start a new round + nextFile = 0; + iteration++; + } + } + System.out.println("opening: "+f+" length: "+f.length()); + try { + zis = new GZIPInputStream(new BufferedInputStream(new FileInputStream(f))); + break; + } catch (Exception e) { + retries++; + System.out.println("Skipping 'bad' file "+f.getAbsolutePath()+" #retries="+retries); + continue; + } + } + reader = new BufferedReader(new InputStreamReader(zis)); + } + + private void closeInputs() { + if (zis!=null) { + try { + zis.close(); + } catch (IOException e) { + System.out.println("closeInputs(): Ingnoring error: "+e); + e.printStackTrace(); + } + zis = null; + } + if (reader!=null) { + try { + reader.close(); + } catch (IOException e) { + System.out.println("closeInputs(): Ingnoring error: "+e); + e.printStackTrace(); + } + reader = null; + } + } + + // read until finding a line that starts with the specified prefix + private StringBuffer read (String prefix, StringBuffer sb, boolean collectMatchLine, boolean collectAll) throws Exception { + sb = (sb==null ? new StringBuffer() : sb); + String sep = ""; + while (true) { + String line = reader.readLine(); + if (line==null) { + openNextFile(); + continue; + } + if (line.startsWith(prefix)) { + if (collectMatchLine) { + sb.append(sep+line); + sep = newline; + } + break; + } + if (collectAll) { + sb.append(sep+line); + sep = newline; + } + } + //System.out.println("read: "+sb); + return sb; + } + + protected DocData getNextDocData() throws Exception { + if (reader==null) { + openNextFile(); + } + // 1. skip until doc start + read("",null,false,false); + // 2. name + StringBuffer sb = read("",null,true,false); + String name = sb.substring("".length()); + name = name.substring(0,name.indexOf(""))+"_"+iteration; + // 3. skip until doc header + read("",null,false,false); + // 4. date + sb = read("Date: ",null,true,false); + String dateStr = sb.substring("Date: ".length()); + // 5. skip until end of doc header + read("",null,false,false); + // 6. collect until end of doc + sb = read("",null,false,true); + // this is the next document, so parse it + HTMLParser p = new HTMLParser(new StringReader(sb.toString())); + // title + String title = p.getTitle(); + // properties + Properties props = p.getMetaTags(); + // body + Reader r = p.getReader(); + char c[] = new char[1024]; + StringBuffer bodyBuf = new StringBuffer(); + int n; + while ((n = r.read(c)) >= 0) { + if (n>0) { + bodyBuf.append(c,0,n); + } + } + addBytes(bodyBuf.length()); + + DocData dd = new DocData(); + + dd.date = dateFormat.parse(dateStr.trim()); + dd.name = name; + dd.title = title; + dd.body = bodyBuf.toString(); + dd.props = props; + return dd; + } + + + /* + * (non-Javadoc) + * @see DocMaker#resetIinputs() + */ + public synchronized void resetInputs() { + super.resetInputs(); + closeInputs(); + nextFile = 0; + iteration = 0; + } + + /* + * (non-Javadoc) + * @see DocMaker#numUniqueTexts() + */ + public int numUniqueTexts() { + return inputFiles.size(); + } + +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java ___________________________________________________________________ Name: svn:executable + * Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (revision 0) @@ -0,0 +1,281 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.util.ArrayList; +import java.util.Date; +import java.util.Iterator; +import java.util.Properties; + +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.Format; + + +/** + * Create documents for the test. + * Maintains counters of chars etc. so that sub-classes just need to + * provide textual content, and the create-by-size is handled here. + */ +public abstract class BasicDocMaker implements DocMaker { + + private int numDocsCreated = 0; + + static class DocData { + String name; + Date date; + String title; + String body; + Properties props; + } + + private static class LeftOver { + private DocData docdata; + private int cnt; + } + + // leftovers are thread local, because it is unsafe to share residues between threads + private ThreadLocal leftovr = new ThreadLocal(); + + static final String BODY_FIELD = "body"; + private long numBytes = 0; + private long numUniqueBytes = 0; + + protected Config config; + + protected Field.Store storeVal = Field.Store.NO; + protected Field.Index indexVal = Field.Index.TOKENIZED; + protected Field.TermVector termVecVal = Field.TermVector.NO; + + private synchronized int incrNumDocsCreated() { + return numDocsCreated++; + } + + /** + * Return the data of the next document. + * @return data of the next document. + * @exception if cannot create the next doc data + */ + protected abstract DocData getNextDocData() throws Exception; + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument() + */ + public Document makeDocument () throws Exception { + resetLeftovers(); + DocData docData = getNextDocData(); + Document doc = createDocument(docData,0,-1); + return doc; + } + + // create a doc + // use only part of the body, modify it to keep the rest (or use all if size==0). + // reset the docdata properties so they are not added more than once. + private Document createDocument(DocData docData, int size, int cnt) { + int docid = incrNumDocsCreated(); + Document doc = new Document(); + doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal)); + if (docData.name!=null) { + String name = (cnt<0 ? docData.name : docData.name+"_"+cnt); + doc.add(new Field("docname", name, storeVal, indexVal, termVecVal)); + } + if (docData.date!=null) { + String dateStr = DateTools.dateToString(docData.date, DateTools.Resolution.SECOND); + doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal)); + } + if (docData.title!=null) { + doc.add(new Field("doctitle", docData.title, storeVal, indexVal, termVecVal)); + } + if (docData.body!=null && docData.body.length()>0) { + String bdy; + if (size<=0 || size>=docData.body.length()) { + bdy = docData.body; // use all + docData.body = ""; // nothing left + } else { + // attempt not to break words - if whitespace found within next 20 chars... + for (int n=size-1; n ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline); + int nut = numUniqueTexts(); + if (nut > lastPrintedNumUniqueTexts) { + print = true; + sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline); + lastPrintedNumUniqueTexts = nut; + } + long nub = numUniqueBytes(); + if (nub > lastPrintedNumUniqueBytes) { + print = true; + sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline); + lastPrintedNumUniqueBytes = nub; + } + if (getCount()>0) { + print = true; + sb.append("num docs added since last inputs reset: ").append(Format.format(0,getCount(),col)).append(newline); + sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline); + } + if (print) { + System.out.println(sb.append(newline).toString()); + printNum++; + } + } + + protected void collectFiles(File f, ArrayList inputFiles) { + if (!f.canRead()) { + return; + } + if (f.isDirectory()) { + File files[] = f.listFiles(); + for (int i = 0; i < files.length; i++) { + collectFiles(files[i],inputFiles); + } + return; + } + inputFiles.add(f); + addUniqueBytes(f.length()); + } + + +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java ___________________________________________________________________ Name: svn:executable + * Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (revision 501181) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (working copy) @@ -27,9 +27,18 @@ * Run the benchmark algorithm. *

Usage: java Benchmark algorithm-file *

    - *
  1. Read algorithm. - *
  2. Run the algorithm. + *
  3. Read algorithm.
  4. + *
  5. Run the algorithm.
  6. *
+ * Things to be added/fixed in "Benchmarking by tasks": + *
    + *
  1. TODO - report into Excel and/or graphed view.
  2. + *
  3. TODO - perf comparison between Lucene releases over the years.
  4. + *
  5. TODO - perf report adequate to include in Lucene nightly build site? (so we can easily track performance changes.)
  6. + *
  7. TODO - add overall time control for repeated execution (vs. current by-count only).
  8. + *
  9. TODO - query maker that is based on index statistics.
  10. + *
  11. TODO - prpoerties documentation - each task should document the properties it relies on.
  12. + *
*/ public class Benchmark { Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html (revision 501181) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html (working copy) @@ -44,7 +44,7 @@ feeds - Sources foe benchmark inputs: documents and queries. + Sources for benchmark inputs: documents and queries. utils @@ -92,7 +92,7 @@
- would run the compound-penalty.alg "algorithm".
  • ant run-task -Dtask.alg=[full-path-to-your-alg-file] -
    - would run the your perf test "algorithm". +
    - would run your perf test "algorithm".
  • java org.apache.lucene.benchmark.byTask.programmatic.Sample
    - would run a performance test programmatically - without using an alg file. @@ -109,7 +109,7 @@

    Each benchmark run has a DocMaker and a QueryMaker. These two should usually match, so that "meaningful" queries are used for a certain collection. -Properties defined at the header of the alg file define which "makers" should be used. +Properties set at the header of the alg file define which "makers" should be used. You can also specify your own makers, implementing the DocMaker and QureyMaker interfaces.

    @@ -275,8 +275,8 @@
    This increments a global "round counter". All task runs that would start now would record the new, updated round counter as their round number. This would appear in reports. In particular, see RepSumByNameRound above. -
    An additional effect of NewRound, is that numeric and boolean properties defined in the - .properties file as a sequence of values, e.g. merge.factor=mrg:10:100:10:100 would +
    An additional effect of NewRound, is that numeric and boolean properties defined (at the head + of the .alg file) as a sequence of values, e.g. merge.factor=mrg:10:100:10:100 would increment (cyclic) to the next value. Note: this would also be reflected in the reports, in this case under a column that would be named "mrg".
  • @@ -368,7 +368,7 @@ (Make sure it is no shorter than any value in the sequence).