Index: conf/micro-standard.alg =================================================================== --- conf/micro-standard.alg (revision 519567) +++ conf/micro-standard.alg (working copy) @@ -64,9 +64,9 @@ { "SrchNewRdr" Search > : 500 - { "SrchTrvNewRdr" SearchTrav > : 300 + { "SrchTrvNewRdr" SearchTrav(1000) > : 300 - { "SrchTrvRetNewRdr" SearchTravRet > : 100 + { "SrchTrvRetNewRdr" SearchTravRet(2000) > : 100 NewRound Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 519567) +++ CHANGES.txt (working copy) @@ -4,6 +4,16 @@ $Id:$ +3/19/07 + +1. Introduced an AbstractQueryMaker to hold common QueryMaker code. (GSI) +2. Added traversalSize parameter to SearchTravRetTask and SearchTravTask. Changed SearchTravRetTask to extend SearchTravTask. (GSI) +3. Added FileBasedQueryMaker to run queries from a File or resource. (GSI) +4. Modified query-maker generation for read related tasks to make further read tasks addition simpler and safer. (DC) +5. Changed Taks' setParams() to throw UnsupportedOperationException if that task does not suppot command line param. (DC) +6. Improved javadoc to specify all properties command line params currently supported. (DC) +7. Refactored ReportTasks so that it is easy/possible now to create new report tasks. (DC) + 01/09/07 1. Committed Doron Cohen's benchmarking contribution, which provides an easily expandable task based approach to benchmarking. See the javadocs for information. (Doron Cohen via Grant Ingersoll) Index: src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 0) +++ src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 0) @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.benchmark.byTask; + +import java.io.StringReader; + +import org.apache.lucene.benchmark.byTask.Benchmark; +import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; + +import junit.framework.TestCase; + +/** + * Test very simply that perf tasks - simple algorithms - are doing what they should. + */ +public class TestPerfTasksLogic extends TestCase { + + private static final boolean DEBUG = true; + static final String NEW_LINE = System.getProperty("line.separator"); + + // properties in effect in all tests here + static final String propLines [] = { + "directory=RAMDirectory", + "print.props=false", + }; + + /** + * @param name test name + */ + public TestPerfTasksLogic(String name) { + super(name); + } + + /** + * Test index creation logic + */ + public void testPopulateIndex() throws Exception { + String algLines[] = { + "ResetSystemErase", + "CreateIndex", + "{ AddDoc } : 1000", + "Optimize", + "CloseIndex", + "OpenReader", + "{ CountingSearchTest } : 200", + "CloseReader", + "[ CountingSearchTest > : 70", + "[ CountingSearchTest > : 9", + }; + String algText = algLinesToText(algLines); + logTstLogic(algText); + Benchmark benchmark = new Benchmark(new StringReader(algText)); + CountingSearchTestTask.numSearches = 0; + benchmark.execute(); + assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches); + assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); + // now we should be able to open the index for write. + IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false); + iw.close(); + IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory()); + assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs()); + } + + // catenate alg lines to make the alg text + private String algLinesToText(String[] algLines) { + String indent = " "; + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < propLines.length; i++) { + sb.append(indent).append(propLines[i]).append(NEW_LINE); + } + for (int i = 0; i < algLines.length; i++) { + sb.append(indent).append(algLines[i]).append(NEW_LINE); + } + return sb.toString(); + } + + private void logTstLogic (String txt) { + if (!DEBUG) + return; + System.out.println("Test logic of:"); + System.out.println(txt); + } + +} Property changes on: src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java ___________________________________________________________________ Name: svn:executable + * Name: svn:eol-style + native Index: src/test/org/apache/lucene/benchmark/byTask/tasks/CountingSearchTestTask.java =================================================================== --- src/test/org/apache/lucene/benchmark/byTask/tasks/CountingSearchTestTask.java (revision 0) +++ src/test/org/apache/lucene/benchmark/byTask/tasks/CountingSearchTestTask.java (revision 0) @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.benchmark.byTask.tasks; + +import org.apache.lucene.benchmark.byTask.PerfRunData; + +/** + * Test Search task which counts number of searches. + */ +public class CountingSearchTestTask extends SearchTask { + + public static int numSearches = 0; + + public CountingSearchTestTask(PerfRunData runData) { + super(runData); + } + + public int doLogic() throws Exception { + int res = super.doLogic(); + incrNumSearches(); + return res; + } + + private static synchronized void incrNumSearches() { + numSearches++; + } + +} Property changes on: src/test/org/apache/lucene/benchmark/byTask/tasks/CountingSearchTestTask.java ___________________________________________________________________ Name: svn:executable + * Name: svn:eol-style + native Index: src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java =================================================================== --- src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java (revision 0) +++ src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java (revision 0) @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.benchmark.byTask; + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Iterator; + +import org.apache.lucene.benchmark.byTask.tasks.PerfTask; +import org.apache.lucene.benchmark.byTask.tasks.TaskSequence; +import org.apache.lucene.benchmark.byTask.utils.Algorithm; + +import junit.framework.TestCase; + +/** + * Test very simply that perf tasks are parses as expected. + */ +public class TestPerfTasksParse extends TestCase { + + private static final boolean DEBUG = true; + static final String NEW_LINE = System.getProperty("line.separator"); + + /* + * All known tasks. + * As new tasks are added, add them here. + * It would be nice to do that automatically, unfortunately + * Java does not provide a "get all classes in package" or + * "get all sub-classes" functionality. + */ + static String singleTaskAlgs [] = { + " AddDoc ", + " AddDoc(1000.0) ", // for numeruc params use double form, just for this test. + " ClearStats ", + " CloseIndex ", + " CloseReader ", + " CreateIndex ", + " DeleteDoc ", + " DeleteDoc(500.0) ", + " NewRound ", + " OpenIndex ", + " OpenReader ", + " Optimize ", + //" Perf ", // not a usable task + //" Read ", // not a usable task + " RepAll ", + " RepSelectByPref prefix ", + " RepSumByNameRound ", + " RepSumByName ", + " RepSumByPrefRound prefix ", + " RepSumByPref prefix ", + //" Report ", // not a usable task + " ResetInputs ", + " ResetSystemErase ", + " ResetSystemSoft ", + " Search ", + " SearchTravRet ", + " SearchTravRet(100.0) ", + " SearchTrav ", + " SearchTrav(50.0) ", + " SetProp ", + " SetProp(name,value) ", + //" TaskSequence.java ", // not an explicitely usable task + " Warm ", + }; + + + /** + * @param name test name + */ + public TestPerfTasksParse(String name) { + super(name); + } + + /** + * Test the parsing of very simple tasks, for all tasks + */ + public void testAllTasksSimpleParse() { + doTestAllTasksSimpleParse(false,false); + } + + /** + * Test the parsing of simple sequential sequences, for all tasks + */ + public void testAllTasksSimpleParseSequntial() { + doTestAllTasksSimpleParse(true,false); + } + + /** + * Test the parsing of simple parallel sequences, for all tasks + */ + public void testAllTasksSimpleParseParallel() { + doTestAllTasksSimpleParse(true,true); + } + + // utility for simple parsing testing of all tasks. + private void doTestAllTasksSimpleParse(boolean parOrSeq, boolean par) { + String propline = "print.props=false" + NEW_LINE; + for (int i = 0; i < singleTaskAlgs.length; i++) { + String testedTask = singleTaskAlgs[i]; + if (parOrSeq) { + if (par) { + testedTask = "[ " + testedTask + " ] : 2"; + } else { + testedTask = "{ " + testedTask + " } : 3"; + } + } + try { + log(testedTask); + Benchmark benchmark = new Benchmark(new StringReader(propline+testedTask)); + Algorithm alg = benchmark.getAlgorithm(); + ArrayList algTasks = alg.extractTasks(); + // must find a task with this name in the algorithm + boolean foundName = false; + boolean foundPar = false; + String theTask = singleTaskAlgs[i].replaceAll(" +"," ").trim(); + for (Iterator iter = algTasks.iterator(); iter.hasNext();) { + PerfTask task = (PerfTask) iter.next(); + foundName |= (task.toString().indexOf(theTask)>=0); + foundPar |= (task instanceof TaskSequence && ((TaskSequence)task).isParallel()); + } + assertTrue("Task "+testedTask+" was not found in "+alg.toString(),foundName); + if (parOrSeq) { + if (par) { + assertTrue("Task "+testedTask+" was supposed to be parallel in "+alg.toString(),foundPar); + } else { + assertFalse("Task "+testedTask+" was not supposed to be parallel in "+alg.toString(),foundPar); + } + } + } catch (Exception e) { + System.out.flush(); + e.printStackTrace(); + fail(e.getMessage()); + } + } + } + + /** + * Test the repetiotion parsing for parallel tasks + */ + public void testParseParallelTaskSequenceRepetition() throws Exception { + String propline = "print.props=false" + NEW_LINE; + String taskStr = "AddDoc"; + String parsedTasks = "[ "+taskStr+" ] : 1000"; + Benchmark benchmark = new Benchmark(new StringReader(propline+parsedTasks)); + Algorithm alg = benchmark.getAlgorithm(); + ArrayList algTasks = alg.extractTasks(); + boolean foundAdd = false; + for (Iterator iter = algTasks.iterator(); iter.hasNext();) { + PerfTask task = (PerfTask) iter.next(); + if (task.toString().indexOf(taskStr)>=0) { + foundAdd = true; + } + if (task instanceof TaskSequence) { + assertEquals("repetions should be 1000 for "+parsedTasks, 1000, ((TaskSequence) task).getRepetitions()); + assertTrue("sequence for "+parsedTasks+" should be parallel!", ((TaskSequence) task).isParallel()); + } + assertTrue("Task "+taskStr+" was not found in "+alg.toString(),foundAdd); + } + } + + /** + * Test the repetiotion parsing for sequential tasks + */ + public void testParseTaskSequenceRepetition() throws Exception { + String propline = "print.props=false" + NEW_LINE; + String taskStr = "AddDoc"; + String parsedTasks = "{ "+taskStr+" } : 1000"; + Benchmark benchmark = new Benchmark(new StringReader(propline+parsedTasks)); + Algorithm alg = benchmark.getAlgorithm(); + ArrayList algTasks = alg.extractTasks(); + boolean foundAdd = false; + for (Iterator iter = algTasks.iterator(); iter.hasNext();) { + PerfTask task = (PerfTask) iter.next(); + if (task.toString().indexOf(taskStr)>=0) { + foundAdd = true; + } + if (task instanceof TaskSequence) { + assertEquals("repetions should be 1000 for "+parsedTasks, 1000, ((TaskSequence) task).getRepetitions()); + assertFalse("sequence for "+parsedTasks+" should be sequential!", ((TaskSequence) task).isParallel()); + } + assertTrue("Task "+taskStr+" was not found in "+alg.toString(),foundAdd); + } + } + + private void log (String txt) { + if (DEBUG) System.out.println("Test parsing of: "+txt); + } + +} Property changes on: src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java ___________________________________________________________________ Name: svn:executable + * Name: svn:eol-style + native Index: src/java/org/apache/lucene/benchmark/byTask/utils/Config.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (working copy) @@ -19,9 +19,8 @@ import java.io.BufferedReader; import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileReader; import java.io.IOException; +import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -49,10 +48,10 @@ * @param algFile file containing both algorithm and config properties. * @throws IOException */ - public Config (File algFile) throws IOException { + public Config (Reader algReader) throws IOException { // read alg file to array of lines ArrayList lines = new ArrayList(); - BufferedReader r = new BufferedReader(new FileReader(algFile)); + BufferedReader r = new BufferedReader(algReader); int lastConfigLine=0; for (String line = r.readLine(); line!=null; line=r.readLine()) { lines.add(line); Index: src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (working copy) @@ -223,7 +223,32 @@ public void execute() throws Exception { sequence.doLogic(); } + + /** + * Expert: for test purposes, return all tasks participating in this algorithm. + * @return all tasks participating in this algorithm. + */ + public ArrayList extractTasks() { + ArrayList res = new ArrayList(); + extractTasks(res, sequence); + return res; + } + private void extractTasks (ArrayList extrct, TaskSequence seq) { + if (seq==null) + return; + extrct.add(seq); + ArrayList t = sequence.getTasks(); + if (t==null) + return; + for (int i = 0; i < t.size(); i++) { + PerfTask p = (PerfTask) t.get(0); + if (p instanceof TaskSequence) { + extractTasks(extrct, (TaskSequence)p); + } else { + extrct.add(p); + } + } + } - } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java (working copy) @@ -18,15 +18,16 @@ */ import org.apache.lucene.benchmark.byTask.PerfRunData; -import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; /** * Search and Travrese and Retrieve docs task. * *

Note: This task reuses the reader if it is already open. * Otherwise a reader is opened at start and closed at the end. + * + * Takes optional param: traversal size (otherwise all results are traversed). */ -public class SearchTravRetTask extends ReadTask { +public class SearchTravRetTask extends SearchTravTask { public SearchTravRetTask(PerfRunData runData) { super(runData); @@ -36,21 +37,4 @@ return true; } - public boolean withSearch() { - return true; - } - - public boolean withTraverse() { - return true; - } - - public boolean withWarm() { - return false; - } - - public QueryMaker getQueryMaker() { - return getRunData().getSearchTravQueryMaker(); - } - - } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (working copy) @@ -23,11 +23,19 @@ /** - * Add a document, optionally with of a cetrain size. - * Other side effects: none. + * Add a document, optionally with of a certain size. + *
Other side effects: none. + *
Relevant properties: doc.add.log.step. + *
Takes optional param: document size. */ public class AddDocTask extends PerfTask { + /** + * Default value for property doc.add.log.step - indicating how often + * an "added N docs" message should be logged. + */ + public static final int DEFAULT_ADD_DOC_LOG_STEP = 500; + public AddDocTask(PerfRunData runData) { super(runData); } @@ -70,10 +78,10 @@ private void log (int count) { if (logStep<0) { // avoid sync although race possible here - logStep = getRunData().getConfig().get("doc.add.log.step",500); + logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP); } if (logStep>0 && (count%logStep)==0) { - System.out.println("--> processed "+count+" docs"); + System.out.println("--> processed (add) "+count+" docs"); } } @@ -85,4 +93,12 @@ super.setParams(params); docSize = (int) Float.parseFloat(params); } + + /* (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams() + */ + public boolean supportsParams() { + return true; + } + } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java (working copy) @@ -17,12 +17,17 @@ * limitations under the License. */ +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; + import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Report; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; /** * Report all statistics grouped/aggregated by name and round. - * Other side effects: None. + *
Other side effects: None. */ public class RepSumByNameRoundTask extends ReportTask { @@ -31,7 +36,7 @@ } public int doLogic() throws Exception { - Report rp = getRunData().getPoints().reportSumByNameRound(); + Report rp = reportSumByNameRound(getRunData().getPoints().taskStats()); System.out.println(); System.out.println("------------> Report Sum By (any) Name and Round ("+ @@ -42,4 +47,35 @@ return 0; } + /** + * Report statistics as a string, aggregate for tasks named the same, and from the same round. + * @return the report + */ + protected Report reportSumByNameRound(List taskStats) { + // aggregate by task name and round + LinkedHashMap p2 = new LinkedHashMap(); + int reported = 0; + for (Iterator it = taskStats.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0) { // consider only tasks that ended + reported++; + String name = stat1.getTask().getName(); + String rname = stat1.getRound()+"."+name; // group by round + TaskStats stat2 = (TaskStats) p2.get(rname); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(rname,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genPartialReport(reported, p2, taskStats.size()); + } + } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java (working copy) @@ -17,12 +17,16 @@ * limitations under the License. */ +import java.util.Iterator; +import java.util.List; + import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Report; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; /** * Report all statistics with no aggregations. - * Other side effects: None. + *
Other side effects: None. */ public class RepAllTask extends ReportTask { @@ -31,7 +35,7 @@ } public int doLogic() throws Exception { - Report rp = getRunData().getPoints().reportAll(); + Report rp = reportAll(getRunData().getPoints().taskStats()); System.out.println(); System.out.println("------------> Report All ("+rp.getSize()+" out of "+rp.getOutOf()+")"); @@ -39,5 +43,36 @@ System.out.println(); return 0; } + + /** + * Report detailed statistics as a string + * @return the report + */ + protected Report reportAll(List taskStats) { + String longestOp = longestOp(taskStats.iterator()); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longestOp)); + sb.append(newline); + int reported = 0; + Iterator it = taskStats.iterator(); + while (it.hasNext()) { + TaskStats stat = (TaskStats) it.next(); + if (stat.getElapsed()>=0) { // consider only tasks that ended + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longestOp, stat); + reported++; + if (taskStats.size()>2 && reported%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,reported,reported,taskStats.size()); + } } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java (working copy) @@ -17,12 +17,17 @@ * limitations under the License. */ +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; + import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Report; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; /** * Report by-name-prefix statistics aggregated by name. - * Other side effects: None. + *
Other side effects: None. */ public class RepSumByPrefTask extends ReportTask { @@ -33,7 +38,7 @@ protected String prefix; public int doLogic() throws Exception { - Report rp = getRunData().getPoints().reportSumByPrefix(prefix); + Report rp = reportSumByPrefix(getRunData().getPoints().taskStats()); System.out.println(); System.out.println("------------> Report Sum By Prefix ("+prefix+") ("+ @@ -44,6 +49,33 @@ return 0; } + protected Report reportSumByPrefix (List taskStats) { + // aggregate by task name + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = taskStats.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0 && stat1.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + String name = stat1.getTask().getName(); + TaskStats stat2 = (TaskStats) p2.get(name); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(name,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genPartialReport(reported, p2, taskStats.size()); + } + + public void setPrefix(String prefix) { this.prefix = prefix; } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java (working copy) @@ -24,7 +24,7 @@ /** * Reset all index and input data and call gc, does NOT erase index/dir, does NOT clear statistics. * This contains ResetInputs. - * Other side effects: writers/readers nulified, closed. + *
Other side effects: writers/readers nulified, closed. * Index is NOT erased. * Directory is NOT erased. */ Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java (working copy) @@ -25,7 +25,7 @@ /** * Open an index reader. - * Other side effects: index redaer object in perfRunData is set. + *
Other side effects: index redaer object in perfRunData is set. */ public class OpenReaderTask extends PerfTask { Index: src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java (working copy) @@ -21,10 +21,24 @@ /** * Delete a document by docid. - * Other side effects: none. + *
Other side effects: none. + *
Relevant properties: doc.delete.log.step , doc.delete.step. + *
If no docid param is supplied, deletes doc with id = last-deleted-doc + doc.delete.step. + *
Takes optional param: document id. */ public class DeleteDocTask extends PerfTask { + /** + * Gap between ids of deleted docs, applies when no docid param is provided. + */ + public static final int DEFAULT_DOC_DELETE_STEP = 8; + + /** + * Default value for property doc.delete.log.step - indicating how often + * an "deleted N docs" message should be logged. + */ + public static final int DEFAULT_DELETE_DOC_LOG_STEP = 500; + public DeleteDocTask(PerfRunData runData) { super(runData); } @@ -50,10 +64,10 @@ super.setup(); // one time static initializations if (logStep<0) { - logStep = getRunData().getConfig().get("doc.delete.log.step",500); + logStep = getRunData().getConfig().get("doc.delete.log.step",DEFAULT_DELETE_DOC_LOG_STEP); } if (deleteStep<0) { - deleteStep = getRunData().getConfig().get("doc.delete.step",8); + deleteStep = getRunData().getConfig().get("doc.delete.step",DEFAULT_DOC_DELETE_STEP); } // set the docid to be deleted docid = (byStep ? lastDeleted + deleteStep : docid); @@ -69,7 +83,7 @@ private void log (int count) { if (logStep>0 && (count%logStep)==0) { - System.out.println("--> processed "+count+" docs, last deleted: "+lastDeleted); + System.out.println("--> processed (delete) "+count+" docs, last deleted: "+lastDeleted); } } @@ -82,5 +96,12 @@ docid = (int) Float.parseFloat(params); byStep = (docid < 0); } + + /* (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams() + */ + public boolean supportsParams() { + return true; + } } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java (working copy) @@ -24,8 +24,8 @@ /** * Close index reader. - * Other side effects: index reader in perfRunData is nullified. - * This would cause read related tasks to reopen their own reader. + *
Other side effects: index reader in perfRunData is nullified. + *
This would cause read related tasks to reopen their own reader. */ public class CloseReaderTask extends PerfTask { Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (working copy) @@ -28,10 +28,14 @@ /** * Open an index writer. - * Other side effects: index writer object in perfRunData is set. + *
Other side effects: index writer object in perfRunData is set. + *
Relevant properties: merge.factor , max.buffered. */ public class OpenIndexTask extends PerfTask { + public static final int DEFAULT_MAX_BUFFERED = 10; + public static final int DEFAULT_MERGE_PFACTOR = 10; + public OpenIndexTask(PerfRunData runData) { super(runData); } @@ -44,8 +48,8 @@ Config config = getRunData().getConfig(); boolean cmpnd = config.get("compound",true); - int mrgf = config.get("merge.factor",10); - int mxbf = config.get("max.buffered",10); + int mrgf = config.get("merge.factor",DEFAULT_MERGE_PFACTOR); + int mxbf = config.get("max.buffered",DEFAULT_MAX_BUFFERED); // must update params for newly opened writer writer.setMaxBufferedDocs(mxbf); Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (working copy) @@ -28,7 +28,8 @@ /** * Create an index. - * Other side effects: index writer object in perfRunData is set. + *
Other side effects: index writer object in perfRunData is set. + *
Relevant properties: merge.factor , max.buffered. */ public class CreateIndexTask extends PerfTask { @@ -45,8 +46,8 @@ Config config = getRunData().getConfig(); boolean cmpnd = config.get("compound",true); - int mrgf = config.get("merge.factor",10); - int mxbf = config.get("max.buffered",10); + int mrgf = config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR); + int mxbf = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED); iw.setUseCompoundFile(cmpnd); iw.setMergeFactor(mrgf); Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java (working copy) @@ -17,12 +17,17 @@ * limitations under the License. */ +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; + import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Report; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; /** * Report all prefix matching statistics grouped/aggregated by name and round. - * Other side effects: None. + *
Other side effects: None. */ public class RepSumByPrefRoundTask extends RepSumByPrefTask { @@ -31,7 +36,7 @@ } public int doLogic() throws Exception { - Report rp = getRunData().getPoints().reportSumByPrefixRound(prefix); + Report rp = reportSumByPrefixRound(getRunData().getPoints().taskStats()); System.out.println(); System.out.println("------------> Report sum by Prefix ("+prefix+") and Round ("+ @@ -42,4 +47,32 @@ return 0; } + protected Report reportSumByPrefixRound(List taskStats) { + // aggregate by task name and by round + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = taskStats.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0 && stat1.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + String name = stat1.getTask().getName(); + String rname = stat1.getRound()+"."+name; // group by round + TaskStats stat2 = (TaskStats) p2.get(rname); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(rname,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genPartialReport(reported, p2, taskStats.size()); + } + + } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (working copy) @@ -30,6 +30,8 @@ *
* Tasks performing some work that should be measured for the task, can overide setup() and/or tearDown() and * placed that work there. + *
+ * Relevant properties: task.max.depth.log. */ public abstract class PerfTask implements Cloneable { @@ -39,7 +41,7 @@ private String name; private int depth = 0; private int maxDepthLogStart = 0; - protected String params = null; + private String params = null; protected static final String NEW_LINE = System.getProperty("line.separator"); @@ -200,13 +202,24 @@ } /** + * Sub classes that supports parameters must overide this method to return true. + * @return true iff this task supports command line params. + */ + public boolean supportsParams () { + return false; + } + + /** * Set the params of this task. - * Sub classes that supports parameters may overide this method for fetching/processing the params. + * @exception UnsupportedOperationException for tasks supporting command line parameters. */ public void setParams(String params) { + if (!supportsParams()) { + throw new UnsupportedOperationException(getName()+" does not support command line parameters."); + } this.params = params; } - + /** * @return Returns the Params. */ Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SetPropTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SetPropTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SetPropTask.java (working copy) @@ -24,7 +24,8 @@ * A property may have a single value, or a sequence of values, seprated by ":". * If a sequence of values is specified, each time a new round starts, * the next (cyclic) value is taken. - * Other side effects: none. + *
Other side effects: none. + *
Takes mandatory param: "name,value" pair. * @see org.apache.lucene.benchmark.byTask.tasks.NewRoundTask */ public class SetPropTask extends PerfTask { @@ -55,4 +56,11 @@ value = params.substring(k+1).trim(); } + /* (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams() + */ + public boolean supportsParams() { + return true; + } + } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java (working copy) @@ -24,7 +24,7 @@ /** * Close index writer. - * Other side effects: index writer object in perfRunData is nullified. + *
Other side effects: index writer object in perfRunData is nullified. */ public class CloseIndexTask extends PerfTask { Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java (working copy) @@ -17,12 +17,16 @@ * limitations under the License. */ +import java.util.Iterator; +import java.util.List; + import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Report; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; /** * Report by-name-prefix statistics with no aggregations. - * Other side effects: None. + *
Other side effects: None. */ public class RepSelectByPrefTask extends RepSumByPrefTask { @@ -31,7 +35,7 @@ } public int doLogic() throws Exception { - Report rp = getRunData().getPoints().reportSelectByPrefix(prefix); + Report rp = reportSelectByPrefix(getRunData().getPoints().taskStats()); System.out.println(); System.out.println("------------> Report Select By Prefix ("+prefix+") ("+ @@ -41,4 +45,31 @@ return 0; } + + protected Report reportSelectByPrefix(List taskStats) { + String longestOp = longestOp(taskStats.iterator()); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longestOp)); + sb.append(newline); + int reported = 0; + for (Iterator it = taskStats.iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (stat.getElapsed()>=0 && stat.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longestOp,stat); + if (taskStats.size()>2 && reported%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,reported,reported, taskStats.size()); + } + } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java (working copy) @@ -22,7 +22,7 @@ /** * Optimize the index. - * Other side effects: none. + *
Other side effects: none. */ public class OptimizeTask extends PerfTask { Index: src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java (working copy) @@ -22,7 +22,7 @@ /** * Increment the counter for properties maintained by Round Number. - * Other side effects: if there are props by round number, log value change. + *
Other side effects: if there are props by round number, log value change. */ public class NewRoundTask extends PerfTask { Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java (working copy) @@ -25,8 +25,12 @@ * *

Note: This task reuses the reader if it is already open. * Otherwise a reader is opened at start and closed at the end. + *

+ * + * Takes optional param: traversal size (otherwise all results are traversed). */ public class SearchTravTask extends ReadTask { + protected int traversalSize = Integer.MAX_VALUE; public SearchTravTask(PerfRunData runData) { super(runData); @@ -48,8 +52,25 @@ return false; } + + public QueryMaker getQueryMaker() { - return getRunData().getSearchTravRetQueryMaker(); + return getRunData().getQueryMaker(this); } + public int traversalSize() { + return traversalSize; + } + + public void setParams(String params) { + super.setParams(params); + traversalSize = (int)Float.parseFloat(params); + } + + /* (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams() + */ + public boolean supportsParams() { + return true; + } } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java (working copy) @@ -21,7 +21,7 @@ /** * Clear statistics data. - * Other side effects: None. + *
Other side effects: None. */ public class ClearStatsTask extends PerfTask { Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (working copy) @@ -49,7 +49,7 @@ } public QueryMaker getQueryMaker() { - return getRunData().getSearchQueryMaker(); + return getRunData().getQueryMaker(this); } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java (working copy) @@ -17,12 +17,17 @@ * limitations under the License. */ +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; + import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Report; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; /** * Report all statistics aggregated by name. - * Other side effects: None. + *
Other side effects: None. */ public class RepSumByNameTask extends ReportTask { @@ -31,7 +36,7 @@ } public int doLogic() throws Exception { - Report rp = getRunData().getPoints().reportSumByName(); + Report rp = reportSumByName(getRunData().getPoints().taskStats()); System.out.println(); System.out.println("------------> Report Sum By (any) Name ("+ @@ -42,4 +47,35 @@ return 0; } + /** + * Report statistics as a string, aggregate for tasks named the same. + * @return the report + */ + protected Report reportSumByName(List taskStats) { + // aggregate by task name + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = taskStats.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0) { // consider only tasks that ended + reported++; + String name = stat1.getTask().getName(); + TaskStats stat2 = (TaskStats) p2.get(name); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(name,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genPartialReport(reported, p2, taskStats.size()); + } + + } Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java (working copy) @@ -23,8 +23,8 @@ /** * Reset all index and input data and call gc, erase index and dir, does NOT clear statistics. - * This contains ResetInputs. - * Other side effects: writers/readers nulified, deleted, closed. + *
This contains ResetInputs. + *
Other side effects: writers/readers nulified, deleted, closed. * Index is erased. * Directory is erased. */ Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (working copy) @@ -76,9 +76,10 @@ Hits hits = searcher.search(q); //System.out.println("searched: "+q); - if (withTraverse()) { + if (withTraverse() && hits!=null) { Document doc = null; - if (hits != null && hits.length() > 0) { + int traversalSize = Math.min(hits.length(), traversalSize()); + if (traversalSize > 0) { for (int m = 0; m < hits.length(); m++) { int id = hits.id(m); res++; @@ -121,6 +122,18 @@ public abstract boolean withTraverse (); /** + * Specify the number of hits to traverse. Tasks should override this if they want to restrict the number + * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0. + * + * Read task calculates the traversal as: Math.min(hits.length(), traversalSize()) + * @return Integer.MAX_VALUE + */ + public int traversalSize() + { + return Integer.MAX_VALUE; + } + + /** * Return true if, with search & results traversing, docs should be retrieved. */ public abstract boolean withRetrieve (); Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java (working copy) @@ -1,6 +1,12 @@ package org.apache.lucene.benchmark.byTask.tasks; +import java.util.Iterator; +import java.util.LinkedHashMap; + import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.stats.Report; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; +import org.apache.lucene.benchmark.byTask.utils.Format; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -41,4 +47,116 @@ protected boolean shouldNotRecordStats() { return true; } + + /* + * From here start the code used to generate the reports. + * Subclasses would use this part to generate reports. + */ + + protected static final String newline = System.getProperty("line.separator"); + + /** + * Get a textual summary of the benchmark results, average from all test runs. + */ + protected static final String OP = "Operation "; + protected static final String ROUND = " round"; + protected static final String RUNCNT = " runCnt"; + protected static final String RECCNT = " recsPerRun"; + protected static final String RECSEC = " rec/s"; + protected static final String ELAPSED = " elapsedSec"; + protected static final String USEDMEM = " avgUsedMem"; + protected static final String TOTMEM = " avgTotalMem"; + protected static final String COLS[] = { + RUNCNT, + RECCNT, + RECSEC, + ELAPSED, + USEDMEM, + TOTMEM + }; + + /** + * Compute a title line for a report table + * @param longestOp size of longest op name in the table + * @return the table title line. + */ + protected String tableTitle (String longestOp) { + StringBuffer sb = new StringBuffer(); + sb.append(Format.format(OP,longestOp)); + sb.append(ROUND); + sb.append(getRunData().getConfig().getColsNamesForValsByRound()); + for (int i = 0; i < COLS.length; i++) { + sb.append(COLS[i]); + } + return sb.toString(); + } + + /** + * find the longest op name out of completed tasks. + * @param taskStats completed tasks to be considered. + * @return the longest op name out of completed tasks. + */ + protected String longestOp(Iterator taskStats) { + String longest = OP; + while (taskStats.hasNext()) { + TaskStats stat = (TaskStats) taskStats.next(); + if (stat.getElapsed()>=0) { // consider only tasks that ended + String name = stat.getTask().getName(); + if (name.length() > longest.length()) { + longest = name; + } + } + } + return longest; + } + + /** + * Compute a report line for the given task stat. + * @param longestOp size of longest op name in the table. + * @param stat task stat to be printed. + * @return the report line. + */ + protected String taskReportLine(String longestOp, TaskStats stat) { + PerfTask task = stat.getTask(); + StringBuffer sb = new StringBuffer(); + sb.append(Format.format(task.getName(), longestOp)); + String round = (stat.getRound()>=0 ? ""+stat.getRound() : "-"); + sb.append(Format.formatPaddLeft(round, ROUND)); + sb.append(getRunData().getConfig().getColsValuesForValsByRound(stat.getRound())); + sb.append(Format.format(stat.getNumRuns(), RUNCNT)); + sb.append(Format.format(stat.getCount() / stat.getNumRuns(), RECCNT)); + long elapsed = (stat.getElapsed()>0 ? stat.getElapsed() : 1); // assume at least 1ms + sb.append(Format.format(1,(float) (stat.getCount() * 1000.0 / elapsed), RECSEC)); + sb.append(Format.format(2, (float) stat.getElapsed() / 1000, ELAPSED)); + sb.append(Format.format(0, (float) stat.getMaxUsedMem() / stat.getNumRuns(), USEDMEM)); + sb.append(Format.format(0, (float) stat.getMaxTotMem() / stat.getNumRuns(), TOTMEM)); + return sb.toString(); + } + + protected Report genPartialReport(int reported, LinkedHashMap partOfTasks, int totalSize) { + String longetOp = longestOp(partOfTasks.values().iterator()); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longetOp)); + sb.append(newline); + int lineNum = 0; + for (Iterator it = partOfTasks.values().iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longetOp,stat); + lineNum++; + if (partOfTasks.size()>2 && lineNum%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,partOfTasks.size(),reported,totalSize); + } + + + } Index: src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (working copy) @@ -17,10 +17,6 @@ * limitations under the License. */ -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; @@ -30,20 +26,18 @@ import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + /** * A QueryMaker that makes queries devised manually (by Grant Ingersoll) for * searching in the Reuters collection. */ -public class ReutersQueryMaker implements QueryMaker { - - private int qnum = 0; - private Query queries[]; - private Config config; - +public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker { + private static String [] STANDARD_QUERIES = { //Start with some short queries "Salomon", "Comex", "night trading", "Japan Sony", @@ -106,7 +100,7 @@ return (Query[]) queries.toArray(new Query[0]); } - private void prepareQueries() throws Exception { + protected Query[] prepareQueries() throws Exception { // analyzer (default is standard analyzer) Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance(); @@ -114,47 +108,10 @@ List queryList = new ArrayList(20); queryList.addAll(Arrays.asList(STANDARD_QUERIES)); queryList.addAll(Arrays.asList(getPrebuiltQueries("body"))); - queries = createQueries(queryList, anlzr); + return createQueries(queryList, anlzr); } + + - public Query makeQuery() throws Exception { - return queries[nextQnum()]; - } - - public void setConfig(Config config) throws Exception { - this.config = config; - prepareQueries(); - } - - public void resetInputs() { - qnum = 0; - } - - // return next qnum - private synchronized int nextQnum() { - int res = qnum; - qnum = (qnum+1) % queries.length; - return res; - } - - public String printQueries() { - String newline = System.getProperty("line.separator"); - StringBuffer sb = new StringBuffer(); - if (queries != null) { - for (int i = 0; i < queries.length; i++) { - sb.append(i+". "+Format.simpleName(queries[i].getClass())+" - "+queries[i].toString()); - sb.append(newline); - } - } - return sb.toString(); - } - - /* - * (non-Javadoc) - * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int) - */ - public Query makeQuery(int size) throws Exception { - throw new Exception(this+".makeQuery(int size) is not supported!"); - } } Index: src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java (working copy) @@ -17,28 +17,23 @@ * limitations under the License. */ -import java.util.ArrayList; - import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.BooleanClause.Occur; +import java.util.ArrayList; + /** * A QueryMaker that makes queries for a collection created * using {@link org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker}. */ -public class SimpleQueryMaker implements QueryMaker { +public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker { - private int qnum = 0; - private Query queries[]; - private Config config; - + /** * Prepare the queries for this test. * Extending classes can overide this method for preparing different queries. @@ -70,44 +65,4 @@ return (Query []) qq.toArray(new Query[0]); } - public Query makeQuery() throws Exception { - return queries[nextQnum()]; - } - - public void setConfig(Config config) throws Exception { - this.config = config; - queries = prepareQueries(); - } - - public void resetInputs() { - qnum = 0; - } - - // return next qnum - private synchronized int nextQnum() { - int res = qnum; - qnum = (qnum+1) % queries.length; - return res; - } - - public String printQueries() { - String newline = System.getProperty("line.separator"); - StringBuffer sb = new StringBuffer(); - if (queries != null) { - for (int i = 0; i < queries.length; i++) { - sb.append(i+". "+Format.simpleName(queries[i].getClass())+" - "+queries[i].toString()); - sb.append(newline); - } - } - return sb.toString(); - } - - /* - * (non-Javadoc) - * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int) - */ - public Query makeQuery(int size) throws Exception { - throw new Exception(this+".makeQuery(int size) is not supported!"); - } - } Index: src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java (revision 0) @@ -0,0 +1,94 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.search.Query; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +/** + * Copyright 2004 The Apache Software Foundation + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Create queries from a FileReader. One per line, pass them through the + * QueryParser. Lines beginning with # are treated as comments + * + * File can be specified as a absolute, relative or resource. + * Two properties can be set: + * file.query.maker.file=<Full path to file containing queries> + *
+ * file.query.maker.default.field=<Name of default field - Default value is "body"> + * + * Example: + * file.query.maker.file=c:/myqueries.txt + * file.query.maker.default.field=body + */ +public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMaker{ + + + protected Query[] prepareQueries() throws Exception { + + Analyzer anlzr = (Analyzer) Class.forName(config.get("analyzer", + "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance(); + String defaultField = config.get("file.query.maker.default.field", "body"); + QueryParser qp = new QueryParser(defaultField, anlzr); + + List qq = new ArrayList(); + String fileName = config.get("file.query.maker.file", null); + if (fileName != null) + { + File file = new File(fileName); + Reader reader = null; + if (file != null && file.exists()) + { + reader = new FileReader(file); + } else { + //see if we can find it as a resource + InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName); + if (asStream != null) { + reader = new InputStreamReader(asStream); + } + } + if (reader != null) { + BufferedReader buffered = new BufferedReader(reader); + String line = null; + int lineNum = 0; + while ((line = buffered.readLine()) != null) + { + line = line.trim(); + if (!line.equals("") && !line.startsWith("#")) + { + Query query = null; + try { + query = qp.parse(line); + } catch (ParseException e) { + System.err.println("Exception: " + e.getMessage() + " occurred while parsing line: " + lineNum + " Text: " + line); + } + qq.add(query); + } + lineNum++; + } + } else { + System.err.println("No Reader available for: " + fileName); + } + } + Query [] result = (Query[]) qq.toArray(new Query[qq.size()]) ; + return result; + } +} Property changes on: src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java ___________________________________________________________________ Name: svn:executable + * Name: svn:eol-style + native Index: src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleSloppyPhraseQueryMaker.java (working copy) @@ -36,7 +36,7 @@ q.setSlop(slop); int wind = wd; for (int i=0; i0) { remainedSlop--; wind++; @@ -49,7 +49,7 @@ q.setSlop(slop+2*qlen); wind = wd+qlen+remainedSlop-1; for (int i=0; i0) { remainedSlop--; wind--; Index: src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java (revision 0) @@ -0,0 +1,73 @@ +package org.apache.lucene.benchmark.byTask.feeds; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Query; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.Format; + +/** + * Abstract base query maker. + * Each query maker should just implement the {@link #prepareQueries()} method. + **/ +public abstract class AbstractQueryMaker implements QueryMaker { + + protected int qnum = 0; + protected Query[] queries; + protected Config config; + + public void resetInputs() { + qnum = 0; + } + + protected abstract Query[] prepareQueries() throws Exception; + + public void setConfig(Config config) throws Exception { + this.config = config; + queries = prepareQueries(); + } + + public String printQueries() { + String newline = System.getProperty("line.separator"); + StringBuffer sb = new StringBuffer(); + if (queries != null) { + for (int i = 0; i < queries.length; i++) { + sb.append(i+". "+ Format.simpleName(queries[i].getClass())+" - "+queries[i].toString()); + sb.append(newline); + } + } + return sb.toString(); + } + + public Query makeQuery() throws Exception { + return queries[nextQnum()]; + } + + // return next qnum + protected synchronized int nextQnum() { + int res = qnum; + qnum = (qnum+1) % queries.length; + return res; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int) + */ + public Query makeQuery(int size) throws Exception { + throw new Exception(this+".makeQuery(int size) is not supported!"); + } +} Property changes on: src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java ___________________________________________________________________ Name: svn:executable + * Name: svn:eol-style + native Index: src/java/org/apache/lucene/benchmark/byTask/Benchmark.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (working copy) @@ -18,6 +18,8 @@ */ import java.io.File; +import java.io.FileReader; +import java.io.Reader; import org.apache.lucene.benchmark.byTask.utils.Algorithm; import org.apache.lucene.benchmark.byTask.utils.Config; @@ -37,11 +39,38 @@ *

  • TODO - perf report adequate to include in Lucene nightly build site? (so we can easily track performance changes.)
  • *
  • TODO - add overall time control for repeated execution (vs. current by-count only).
  • *
  • TODO - query maker that is based on index statistics.
  • - *
  • TODO - prpoerties documentation - each task should document the properties it relies on.
  • * */ public class Benchmark { + private PerfRunData runData; + private Algorithm algorithm; + private boolean executed; + + public Benchmark (Reader algReader) throws Exception { + // prepare run data + try { + runData = new PerfRunData(new Config(algReader)); + } catch (Exception e) { + throw new Exception("Error: cannot init PerfRunData!",e); + } + + // parse algorithm + try { + algorithm = new Algorithm(runData); + } catch (Exception e) { + throw new Exception("Error: cannot understand algorithm!",e); + } + } + + public synchronized void execute() throws Exception { + if (executed) { + throw new Exception("Benchmark was already executed"); + } + executed = true; + algorithm.execute(); + } + /** * Run the benchmark algorithm. * @param args benchmark config and algorithm files @@ -60,32 +89,22 @@ System.exit(1); } - // last preparations - PerfRunData runData = null; - try { - runData = new PerfRunData(new Config(algFile)); - } catch (Exception e) { - System.err.println("Error: cannot init PerfRunData: "+e.getMessage()); - e.printStackTrace(); - System.exit(1); - } + System.out.println("Running algorithm from: "+algFile.getAbsolutePath()); - // parse algorithm - Algorithm algorithm = null; + Benchmark benchmark = null; try { - algorithm = new Algorithm(runData); + benchmark = new Benchmark(new FileReader(algFile)); } catch (Exception e) { - System.err.println("Error: cannot understand algorithm from file: "+algFile.getAbsolutePath()); e.printStackTrace(); System.exit(1); } System.out.println("------------> algorithm:"); - System.out.println(algorithm.toString()); + System.out.println(benchmark.getAlgorithm().toString()); // execute try { - algorithm.execute(); + benchmark.execute(); } catch (Exception e) { System.err.println("Error: cannot execute the algorithm! "+e.getMessage()); e.printStackTrace(); @@ -97,4 +116,18 @@ } + /** + * @return Returns the algorithm. + */ + public Algorithm getAlgorithm() { + return algorithm; + } + + /** + * @return Returns the runData. + */ + public PerfRunData getRunData() { + return runData; + } + } Index: src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (working copy) @@ -18,10 +18,15 @@ */ import java.io.File; +import java.util.HashMap; +import java.util.Iterator; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; import org.apache.lucene.benchmark.byTask.stats.Points; +import org.apache.lucene.benchmark.byTask.tasks.ReadTask; +import org.apache.lucene.benchmark.byTask.tasks.SearchTask; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; @@ -53,9 +58,10 @@ private Directory directory; private Analyzer analyzer; private DocMaker docMaker; - private QueryMaker searchQueryMaker; - private QueryMaker searchTravQueryMaker; - private QueryMaker searchTravRetQueryMaker; + + // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately. + private HashMap readTaskQueryMaker; + private Class qmkrClass; private IndexReader indexReader; private IndexWriter indexWriter; @@ -72,14 +78,9 @@ "org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker")).newInstance(); docMaker.setConfig(config); // query makers - // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately. - Class qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")); - searchQueryMaker = (QueryMaker) qmkrClass.newInstance(); - searchQueryMaker.setConfig(config); - searchTravQueryMaker = (QueryMaker) qmkrClass.newInstance(); - searchTravQueryMaker.setConfig(config); - searchTravRetQueryMaker = (QueryMaker) qmkrClass.newInstance(); - searchTravRetQueryMaker.setConfig(config); + readTaskQueryMaker = new HashMap(); + qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")); + // index stuff reinit(false); @@ -88,7 +89,7 @@ if (Boolean.valueOf(config.get("log.queries","false")).booleanValue()) { System.out.println("------------> queries:"); - System.out.println(getSearchQueryMaker().printQueries()); + System.out.println(getQueryMaker(new SearchTask(this)).printQueries()); } } @@ -117,7 +118,7 @@ FileUtils.fullyDelete(indexDir); } indexDir.mkdirs(); - directory = FSDirectory.getDirectory(indexDir, eraseIndex); + directory = FSDirectory.getDirectory(indexDir); } else { directory = new RAMDirectory(); } @@ -202,24 +203,30 @@ public void resetInputs() { docMaker.resetInputs(); - searchQueryMaker.resetInputs(); - searchTravQueryMaker.resetInputs(); - searchTravRetQueryMaker.resetInputs(); + Iterator it = readTaskQueryMaker.values().iterator(); + while (it.hasNext()) { + ((QueryMaker) it.next()).resetInputs(); + } } /** - * @return Returns the searchQueryMaker. + * @return Returns the queryMaker by read task type (class) */ - public QueryMaker getSearchQueryMaker() { - return searchQueryMaker; + public QueryMaker getQueryMaker(ReadTask readTask) { + // mapping the query maker by task class allows extending/adding new search/read tasks + // without needing to modify this class. + Class readTaskClass = readTask.getClass(); + QueryMaker qm = (QueryMaker) readTaskQueryMaker.get(readTaskClass); + if (qm == null) { + try { + qm = (QueryMaker) qmkrClass.newInstance(); + qm.setConfig(config); + } catch (Exception e) { + throw new RuntimeException(e); + } + readTaskQueryMaker.put(readTaskClass,qm); + } + return qm; } - public QueryMaker getSearchTravQueryMaker() { - return searchTravQueryMaker; - } - - public QueryMaker getSearchTravRetQueryMaker() { - return searchTravRetQueryMaker; - } - } Index: src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java (working copy) @@ -178,14 +178,14 @@ /* (non-Javadoc) * @see java.lang.Object#clone() */ - protected Object clone() throws CloneNotSupportedException { + public Object clone() throws CloneNotSupportedException { return super.clone(); } /** * @return the round number. */ - int getRound() { + public int getRound() { return round; } Index: src/java/org/apache/lucene/benchmark/byTask/stats/Report.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/Report.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/stats/Report.java (working copy) @@ -27,7 +27,7 @@ private int outOf; private int reported; - Report (String text, int size, int reported, int outOf) { + public Report (String text, int size, int reported, int outOf) { this.text = text; this.size = size; this.reported = reported; Index: src/java/org/apache/lucene/benchmark/byTask/stats/Points.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/Points.java (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/stats/Points.java (working copy) @@ -18,13 +18,10 @@ */ import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.LinkedHashMap; +import java.util.List; import org.apache.lucene.benchmark.byTask.tasks.PerfTask; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; /** @@ -34,8 +31,6 @@ private Config config; - private static final String newline = System.getProperty("line.separator"); - // stat points ordered by their start time. // for now we collect points as TaskStats objects. // later might optimize to collect only native data. @@ -44,269 +39,22 @@ private int nextTaskRunNum = 0; /** - * Get a textual summary of the benchmark results, average from all test runs. - */ - static final String OP = "Operation "; - static final String ROUND = " round"; - static final String RUNCNT = " runCnt"; - static final String RECCNT = " recsPerRun"; - static final String RECSEC = " rec/s"; - static final String ELAPSED = " elapsedSec"; - static final String USEDMEM = " avgUsedMem"; - static final String TOTMEM = " avgTotalMem"; - static final String COLS[] = { - RUNCNT, - RECCNT, - RECSEC, - ELAPSED, - USEDMEM, - TOTMEM - }; - - /** * Create a Points statistics object. */ public Points (Config config) { this.config = config; } - private String tableTitle (String longestOp) { - StringBuffer sb = new StringBuffer(); - sb.append(Format.format(OP,longestOp)); - sb.append(ROUND); - sb.append(config.getColsNamesForValsByRound()); - for (int i = 0; i < COLS.length; i++) { - sb.append(COLS[i]); - } - return sb.toString(); - } - /** - * Report detailed statistics as a string - * @return the report + * Return the current task stats. + * the actual task stats are returned, so caller should not modify this task stats. + * @return current {@link TaskStats}. */ - public Report reportAll() { - String longestOp = longestOp(points); - boolean first = true; - StringBuffer sb = new StringBuffer(); - sb.append(tableTitle(longestOp)); - sb.append(newline); - int reported = 0; - for (Iterator it = points.iterator(); it.hasNext();) { - TaskStats stat = (TaskStats) it.next(); - if (stat.getElapsed()>=0) { // consider only tasks that ended - if (!first) { - sb.append(newline); - } - first = false; - String line = taskReportLine(longestOp, stat); - reported++; - if (points.size()>2&& reported%2==0) { - line = line.replaceAll(" "," - "); - } - sb.append(line); - } - } - String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); - return new Report(reptxt,reported,reported,points.size()); + public List taskStats () { + return points; } /** - * Report statistics as a string, aggregate for tasks named the same. - * @return the report - */ - public Report reportSumByName() { - // aggregate by task name - int reported = 0; - LinkedHashMap p2 = new LinkedHashMap(); - for (Iterator it = points.iterator(); it.hasNext();) { - TaskStats stat1 = (TaskStats) it.next(); - if (stat1.getElapsed()>=0) { // consider only tasks that ended - reported++; - String name = stat1.getTask().getName(); - TaskStats stat2 = (TaskStats) p2.get(name); - if (stat2 == null) { - try { - stat2 = (TaskStats) stat1.clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - p2.put(name,stat2); - } else { - stat2.add(stat1); - } - } - } - // now generate report from secondary list p2 - return genReportFromList(reported, p2); - } - - /** - * Report statistics as a string, aggregate for tasks named the same, and from the same round. - * @return the report - */ - public Report reportSumByNameRound() { - // aggregate by task name and round - LinkedHashMap p2 = new LinkedHashMap(); - int reported = 0; - for (Iterator it = points.iterator(); it.hasNext();) { - TaskStats stat1 = (TaskStats) it.next(); - if (stat1.getElapsed()>=0) { // consider only tasks that ended - reported++; - String name = stat1.getTask().getName(); - String rname = stat1.getRound()+"."+name; // group by round - TaskStats stat2 = (TaskStats) p2.get(rname); - if (stat2 == null) { - try { - stat2 = (TaskStats) stat1.clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - p2.put(rname,stat2); - } else { - stat2.add(stat1); - } - } - } - // now generate report from secondary list p2 - return genReportFromList(reported, p2); - } - - private String longestOp(Collection c) { - String longest = OP; - for (Iterator it = c.iterator(); it.hasNext();) { - TaskStats stat = (TaskStats) it.next(); - if (stat.getElapsed()>=0) { // consider only tasks that ended - String name = stat.getTask().getName(); - if (name.length() > longest.length()) { - longest = name; - } - } - } - return longest; - } - - private String taskReportLine(String longestOp, TaskStats stat) { - PerfTask task = stat.getTask(); - StringBuffer sb = new StringBuffer(); - sb.append(Format.format(task.getName(), longestOp)); - String round = (stat.getRound()>=0 ? ""+stat.getRound() : "-"); - sb.append(Format.formatPaddLeft(round, ROUND)); - sb.append(config.getColsValuesForValsByRound(stat.getRound())); - sb.append(Format.format(stat.getNumRuns(), RUNCNT)); - sb.append(Format.format(stat.getCount() / stat.getNumRuns(), RECCNT)); - long elapsed = (stat.getElapsed()>0 ? stat.getElapsed() : 1); // assume at least 1ms - sb.append(Format.format(1,(float) (stat.getCount() * 1000.0 / elapsed), RECSEC)); - sb.append(Format.format(2, (float) stat.getElapsed() / 1000, ELAPSED)); - sb.append(Format.format(0, (float) stat.getMaxUsedMem() / stat.getNumRuns(), USEDMEM)); - sb.append(Format.format(0, (float) stat.getMaxTotMem() / stat.getNumRuns(), TOTMEM)); - return sb.toString(); - } - - public Report reportSumByPrefix(String prefix) { - // aggregate by task name - int reported = 0; - LinkedHashMap p2 = new LinkedHashMap(); - for (Iterator it = points.iterator(); it.hasNext();) { - TaskStats stat1 = (TaskStats) it.next(); - if (stat1.getElapsed()>=0 && stat1.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name - reported++; - String name = stat1.getTask().getName(); - TaskStats stat2 = (TaskStats) p2.get(name); - if (stat2 == null) { - try { - stat2 = (TaskStats) stat1.clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - p2.put(name,stat2); - } else { - stat2.add(stat1); - } - } - } - // now generate report from secondary list p2 - return genReportFromList(reported, p2); - } - - public Report reportSumByPrefixRound(String prefix) { - // aggregate by task name and by round - int reported = 0; - LinkedHashMap p2 = new LinkedHashMap(); - for (Iterator it = points.iterator(); it.hasNext();) { - TaskStats stat1 = (TaskStats) it.next(); - if (stat1.getElapsed()>=0 && stat1.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name - reported++; - String name = stat1.getTask().getName(); - String rname = stat1.getRound()+"."+name; // group by round - TaskStats stat2 = (TaskStats) p2.get(rname); - if (stat2 == null) { - try { - stat2 = (TaskStats) stat1.clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - p2.put(rname,stat2); - } else { - stat2.add(stat1); - } - } - } - // now generate report from secondary list p2 - return genReportFromList(reported, p2); - } - - private Report genReportFromList(int reported, LinkedHashMap p2) { - String longetOp = longestOp(p2.values()); - boolean first = true; - StringBuffer sb = new StringBuffer(); - sb.append(tableTitle(longetOp)); - sb.append(newline); - int lineNum = 0; - for (Iterator it = p2.values().iterator(); it.hasNext();) { - TaskStats stat = (TaskStats) it.next(); - if (!first) { - sb.append(newline); - } - first = false; - String line = taskReportLine(longetOp,stat); - lineNum++; - if (p2.size()>2&& lineNum%2==0) { - line = line.replaceAll(" "," - "); - } - sb.append(line); - } - String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); - return new Report(reptxt,p2.size(),reported,points.size()); - } - - public Report reportSelectByPrefix(String prefix) { - String longestOp = longestOp(points); - boolean first = true; - StringBuffer sb = new StringBuffer(); - sb.append(tableTitle(longestOp)); - sb.append(newline); - int reported = 0; - for (Iterator it = points.iterator(); it.hasNext();) { - TaskStats stat = (TaskStats) it.next(); - if (stat.getElapsed()>=0 && stat.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name - reported++; - if (!first) { - sb.append(newline); - } - first = false; - String line = taskReportLine(longestOp,stat); - if (points.size()>2&& reported%2==0) { - line = line.replaceAll(" "," - "); - } - sb.append(line); - } - } - String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); - return new Report(reptxt,reported,reported, points.size()); - } - - /** * Mark that a task is starting. * Create a task stats for it and store it as a point. * @param task the starting task. Index: src/java/org/apache/lucene/benchmark/byTask/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/package.html (revision 519567) +++ src/java/org/apache/lucene/benchmark/byTask/package.html (working copy) @@ -64,7 +64,8 @@
  • Benchmark "algorithm"
  • Supported tasks/commands
  • Benchmark properties
  • -
  • Example input algorithm and the result benchmark report.
  • +
  • Example input algorithm and the result benchmark + report.
  • @@ -74,9 +75,12 @@

    -A benchmark is composed of some predefined tasks, allowing for creating an index, adding documents, -optimizing, searching, generating reports, and more. A benchmark run takes an "algorithm" file -that contains a description of the sequence of tasks making up the run, and some properties defining a few +A benchmark is composed of some predefined tasks, allowing for creating an +index, adding documents, +optimizing, searching, generating reports, and more. A benchmark run takes an +"algorithm" file +that contains a description of the sequence of tasks making up the run, and some +properties defining a few additional characteristics of the benchmark run.

    @@ -95,28 +99,30 @@
    - would run your perf test "algorithm".
  • java org.apache.lucene.benchmark.byTask.programmatic.Sample -
    - would run a performance test programmatically - without using an alg file. - This is less readable, and less convinient, but possible. +
    - would run a performance test programmatically - without using an alg + file. This is less readable, and less convinient, but possible.
  • -You may find existing tasks sufficient for defining the benchmark you need, -otherwise, you can extend the framework to meet your needs, as explained herein. +You may find existing tasks sufficient for defining the benchmark you +need, otherwise, you can extend the framework to meet your needs, as explained +herein.

    -Each benchmark run has a DocMaker and a QueryMaker. These two should usually match, so -that "meaningful" queries are used for a certain collection. -Properties set at the header of the alg file define which "makers" should be used. -You can also specify your own makers, implementing the DocMaker and QureyMaker interfaces. +Each benchmark run has a DocMaker and a QueryMaker. These two should usually +match, so that "meaningful" queries are used for a certain collection. +Properties set at the header of the alg file define which "makers" should be +used. You can also specify your own makers, implementing the DocMaker and +QureyMaker interfaces.

    -Benchmark .alg file contains the benchmark "algorithm". The syntax is described below. -Within the algorithm, you can specify groups of commands, assign them names, -specify commands that should be repeated, +Benchmark .alg file contains the benchmark "algorithm". The syntax is described +below. Within the algorithm, you can specify groups of commands, assign them +names, specify commands that should be repeated, do commands in serial or in parallel, and also control the speed of "firing" the commands.

    @@ -151,8 +157,10 @@
    1. - Measuring: When a command is executed, statistics for the elapsed execution time and memory consumption are collected. - At any time, those statistics can be printed, using one of the available ReportTasks. + Measuring: When a command is executed, statistics for the elapsed + execution time and memory consumption are collected. + At any time, those statistics can be printed, using one of the + available ReportTasks.
    2. Comments start with '#'. @@ -161,58 +169,98 @@ Serial sequences are enclosed within '{ }'.
    3. - Parallel sequences are enclosed within '[ ]' + Parallel sequences are enclosed within + '[ ]'
    4. - Sequence naming: To name a sequence, put '"name"' just after '{' or '['. -
      Example - { "ManyAdds" AddDoc } : 1000000 - would - name the sequence of 1M add docs "ManyAdds", and this name would later appear in statistic reports. - If you don't specify a name for a sequence, it is given one: you can see it as the - algorithm is printed just before benchmark execution starts. + Sequence naming: To name a sequence, put + '"name"' just after + '{' or '['. +
      Example - { "ManyAdds" AddDoc } : 1000000 - + would + name the sequence of 1M add docs "ManyAdds", and this name would later appear + in statistic reports. + If you don't specify a name for a sequence, it is given one: you can see it as + the algorithm is printed just before benchmark execution starts.
    5. Repeating: - To repeat sequence tasks N times, add ': N' just after the - sequence closing tag - '}' or ']' or '>'. -
      Example - [ AddDoc ] : 4 - would do 4 addDoc in parallel, spawning 4 threads at once. -
      Example - [ AddDoc AddDoc ] : 4 - would do 8 addDoc in parallel, spawning 8 threads at once. -
      Example - { AddDoc } : 30 - would do addDoc 30 times in a row. -
      Example - { AddDoc AddDoc } : 30 - would do addDoc 60 times in a row. + To repeat sequence tasks N times, add ': N' just + after the + sequence closing tag - '}' or + ']' or '>'. +
      Example - [ AddDoc ] : 4 - would do 4 addDoc + in parallel, spawning 4 threads at once. +
      Example - [ AddDoc AddDoc ] : 4 - would do + 8 addDoc in parallel, spawning 8 threads at once. +
      Example - { AddDoc } : 30 - would do addDoc + 30 times in a row. +
      Example - { AddDoc AddDoc } : 30 - would do + addDoc 60 times in a row.
    6. Command parameter: a command can take a single parameter. - If the certain command does not support a parameter, or if the parameter is of the wrong type, + If the certain command does not support a parameter, or if the parameter is of + the wrong type, reading the algorithm will fail with an exception and the test would not start. - Currently only AddDoc supports a (numeric) parameter, which indicates the required size of added document. - If the DocMaker implementation used in the test does not support makeDoc(size), an exception would be thrown and the test would fail. -
      Example - AddDoc(2000) - would add a document of size 2000 (~bytes). -
      See conf/task-sample.alg for how this can be used, for instance, to check which is faster, adding + Currently the following tasks take parameters: +
        +
      • AddDoc takes a numeric parameter, indicating the required size of + added document. Note: if the DocMaker implementation used in the test + does not support makeDoc(size), an exception would be thrown and the test + would fail. +
      • +
      • DeleteDoc takes numeric parameter, indicating the docid to be + deleted. The latter is not very useful for loops, since the docid is + fixed, so for deletion in loops it is better to use the + doc.delete.step property. +
      • +
      • SetProp takes a "name,value" param, ',' used as a separator. +
      • +
      • SearchTravRetTask and SearchTravTask take a numeric + parameter, indicating the required traversal size. +
      • +
      +
      Example - AddDoc(2000) - would add a document + of size 2000 (~bytes). +
      See conf/task-sample.alg for how this can be used, for instance, to check + which is faster, adding many smaller documents, or few larger documents. - Next candidates for supporting a parameter may be the Search tasks, for controlling the qurey size. + Next candidates for supporting a parameter may be the Search tasks, + for controlling the qurey size.
    7. - Statistic recording elimination: - a sequence can also end with '>', + Statistic recording elimination: - a sequence can also end with + '>', in which case child tasks would not store their statistics. This can be useful to avoid exploding stats data, for adding say 1M docs.
      Example - { "ManyAdds" AddDoc > : 1000000 - would add million docs, measure that total, but not save stats for each addDoc. -
      Notice that the granularity of System.currentTimeMillis() (which is used here) is system dependant, - and in some systems an operation that takes 5 ms to complete may show 0 ms latency time in performance measurements. - Therefore it is sometimes more accurate to look at the elapsed time of a larger sequence, as demonstrated here. +
      Notice that the granularity of System.currentTimeMillis() (which is used + here) is system dependant, + and in some systems an operation that takes 5 ms to complete may show 0 ms + latency time in performance measurements. + Therefore it is sometimes more accurate to look at the elapsed time of a larger + sequence, as demonstrated here.
    8. Rate: - To set a rate (ops/sec or ops/min) for a sequence, add ': N : R' just after sequence closing tag. + To set a rate (ops/sec or ops/min) for a sequence, add + ': N : R' just after sequence closing tag. This would specify repetition of N with rate of R operations/sec. - Use 'R/sec' or 'R/min' + Use 'R/sec' or + 'R/min' to explicitely specify that the rate is per second or per minute. The default is per second, -
      Example - [ AddDoc ] : 400 : 3 - would do 400 addDoc in parallel, starting up to 3 threads per second. -
      Example - { AddDoc } : 100 : 200/min - would do 100 addDoc serially, +
      Example - [ AddDoc ] : 400 : 3 - would do 400 + addDoc in parallel, starting up to 3 threads per second. +
      Example - { AddDoc } : 100 : 200/min - would + do 100 addDoc serially, waiting before starting next add, if otherwise rate would exceed 200 adds/min.
    9. - Command names: Each class "AnyNameTask" in the package org.apache.lucene.benchmark.byTask.tasks, + Command names: Each class "AnyNameTask" in the + package org.apache.lucene.benchmark.byTask.tasks, that extends PerfTask, is supported as command "AnyName" that can be used in the benchmark "algorithm" description. This allows to add new commands by just adding such classes. @@ -239,59 +287,85 @@ RepAll - all (completed) task runs.
    10. - RepSumByName - all statistics, aggregated by name. So, if AddDoc was executed 2000 times, - only 1 report line would be created for it, aggregating all those 2000 statistic records. + RepSumByName - all statistics, + aggregated by name. So, if AddDoc was executed 2000 times, + only 1 report line would be created for it, aggregating all those + 2000 statistic records.
    11. - RepSelectByPref   prefixWord - all records for tasks whose name start with prefixWord. + RepSelectByPref   prefixWord - all + records for tasks whose name start with + prefixWord.
    12. - RepSumByPref   prefixWord - all records for tasks whose name start with prefixWord, + RepSumByPref   prefixWord - all + records for tasks whose name start with + prefixWord, aggregated by their full task name.
    13. - RepSumByNameRound - all statistics, aggregated by name and by Round. - So, if AddDoc was executed 2000 times in each of 3 rounds, 3 report lines would be created for it, - aggregating all those 2000 statistic records in each round. See more about rounds in the NewRound command description below. + RepSumByNameRound - all statistics, + aggregated by name and by Round. + So, if AddDoc was executed 2000 times in each of 3 + rounds, 3 report lines would be + created for it, + aggregating all those 2000 statistic records in each round. + See more about rounds in the NewRound + command description below.
    14. - RepSumByPrefRound   prefixWord - similar to RepSumByNameRound, - just that only tasks whose name starts with prefixWord are included. + RepSumByPrefRound   prefixWord - + similar to RepSumByNameRound, + just that only tasks whose name starts with + prefixWord are included.
    15. - If needed, additional reports can be added by extending the abstract class ReportTask, and by + If needed, additional reports can be added by extending the abstract class + ReportTask, and by manipulating the statistics data in Points and TaskStats. -
    16. Control tasks: Few of the tasks control the benchmark algorithm all over: +
    17. Control tasks: Few of the tasks control the benchmark algorithm + all over:
      • ClearStats - clears the entire statistics. - Further reports would only include task runs that would start after this call. + Further reports would only include task runs that would start after this + call.
      • - NewRound - virtually start a new round of performance test. - Although this command can be placed anywhere, it mostly makes sense at the end of an outermost sequence. -
        This increments a global "round counter". All task runs that would start now would - record the new, updated round counter as their round number. This would appear in reports. + NewRound - virtually start a new round of + performance test. + Although this command can be placed anywhere, it mostly makes sense at + the end of an outermost sequence. +
        This increments a global "round counter". All task runs that + would start now would + record the new, updated round counter as their round number. + This would appear in reports. In particular, see RepSumByNameRound above. -
        An additional effect of NewRound, is that numeric and boolean properties defined (at the head - of the .alg file) as a sequence of values, e.g. merge.factor=mrg:10:100:10:100 would +
        An additional effect of NewRound, is that numeric and boolean + properties defined (at the head + of the .alg file) as a sequence of values, e.g. + merge.factor=mrg:10:100:10:100 would increment (cyclic) to the next value. - Note: this would also be reflected in the reports, in this case under a column that would be named "mrg". + Note: this would also be reflected in the reports, in this case under a + column that would be named "mrg".
      • - ResetInputs - DocMaker and the various QueryMakers + ResetInputs - DocMaker and the + various QueryMakers would reset their counters to start. The way these Maker interfaces work, each call for makeDocument() or makeQuery() creates the next document or query that it "knows" to create. - If that pool is "exhausted", the "maker" start over again. The resetInpus command + If that pool is "exhausted", the "maker" start over again. + The resetInpus command therefore allows to make the rounds comparable. It is therefore useful to invoke ResetInputs together with NewRound.
      • - ResetSystemErase - reset all index and input data and call gc. + ResetSystemErase - reset all index + and input data and call gc. Does NOT reset statistics. This contains ResetInputs. All writers/readers are nullified, deleted, closed. Index is erased. @@ -299,34 +373,48 @@ You would have to call CreateIndex once this was called...
      • - ResetSystemSoft - reset all index and input data and call gc. + ResetSystemSoft - reset all + index and input data and call gc. Does NOT reset statistics. This contains ResetInputs. All writers/readers are nullified, closed. Index is NOT erased. Directory is NOT erased. - This is useful for testing performance on an existing index, for instance if the construction of a large index - took a very long time and now you would to test its search or update performance. + This is useful for testing performance on an existing index, + for instance if the construction of a large index + took a very long time and now you would to test + its search or update performance.
    18. - Other existing tasks are quite straightforward and would just be briefly described here. + Other existing tasks are quite straightforward and would + just be briefly described here.
      • - CreateIndex and OpenIndex both leave the index open for later update operations. + CreateIndex and + OpenIndex both leave the + index open for later update operations. CloseIndex would close it.
      • - OpenReader, similarly, would leave an index reader open for later search operations. + OpenReader, similarly, would + leave an index reader open for later search operations. But this have further semantics. - If a Read operation is performed, and an open reader exists, it would be used. - Otherwise, the read operation would open its own reader and close it when the read operation is done. - This allows testing various scenarios - sharing a reader, searching with "cold" reader, with "warmed" reader, etc. - The read operations affected by this are: Warm, - Search, SearchTrav (search and traverse), - and SearchTravRet (search and traverse and retrieve). - Notice that each of the 3 search task types maintains its own queryMaker instance. + If a Read operation is performed, and an open reader exists, + it would be used. + Otherwise, the read operation would open its own reader + and close it when the read operation is done. + This allows testing various scenarios - sharing a reader, + searching with "cold" reader, with "warmed" reader, etc. + The read operations affected by this are: + Warm, + Search, + SearchTrav (search and traverse), + and SearchTravRet (search + and traverse and retrieve). + Notice that each of the 3 search task types maintains + its own queryMaker instance.
      @@ -341,8 +429,10 @@ As mentioned above for the NewRound task, numeric and boolean properties that are defined as a sequence of values, e.g. merge.factor=mrg:10:100:10:100 -would increment (cyclic) to the next value, when NewRound is called, and would also -appear as a named column in the reports (column name would be "mrg" in this example). +would increment (cyclic) to the next value, +when NewRound is called, and would also +appear as a named column in the reports (column +name would be "mrg" in this example).

      @@ -351,12 +441,13 @@

      1. - analyzer - full class name for the analyzer to use. + analyzer - full + class name for the analyzer to use. Same analyzer would be used in the entire test.
      2. - directory - valid values are FSDirectory and RAMDirectory. + directory - valid values are This tells which directory to use for the performance test.
      3. @@ -384,9 +475,50 @@

      -For additional defined properties see the *.alg files under conf. +Here is a list of currently defined properties:

      +
        +
      1. Docs and queries creation:
      2. +
        • analyzer +
        • doc.maker +
        • doc.stored +
        • doc.tokenized +
        • doc.term.vector +
        • docs.dir +
        • query.maker +
        • file.query.maker.file +
        • file.query.maker.default.field +
        + + +
      3. Logging: +
        • doc.add.log.step +
        • doc.delete.log.step +
        • log.queries +
        • task.max.depth.log +
        +
      4. + +
      5. Index writing: +
        • compound +
        • merge.factor +
        • max.buffered +
        • directory +
        +
      6. + +
      7. Doc deletion: +
        • doc.delete.step +
        +
      8. + +
      + +

      +For sample use of these properties see the *.alg files under conf. +

      +

      Example input algorithm and the result benchmark report