Index: build.xml =================================================================== --- build.xml (revision 475655) +++ build.xml (working copy) @@ -115,5 +115,20 @@ + + Working Directory: ${working.dir} + + + + + + + Working Directory: ${working.dir} + + + + + + Index: conf/task-micro-standard.alg =================================================================== --- conf/task-micro-standard.alg (revision 0) +++ conf/task-micro-standard.alg (revision 0) @@ -0,0 +1,29 @@ +{ "Rounds" + + ResetSystemErase + + { "Populate" + CreateIndex + { "MAddDocs" AddDoc > : 2000 + Optimize + CloseIndex + } + + OpenReader + { "SearchSameRdr" Search > : 5000 + CloseReader + + { "WarmNewRdr" Warm > : 50 + + { "SrchNewRdr" Search > : 500 + + { "SrchTrvNewRdr" SearchTrav > : 300 + + { "SrchTrvRetNewRdr" SearchTravRet > : 100 + + NewRound + +} : 4 + +RepSumByName +RepSumByPrefRound MAddDocs Index: conf/task-micro-standard.properties =================================================================== --- conf/task-micro-standard.properties (revision 0) +++ conf/task-micro-standard.properties (revision 0) @@ -0,0 +1,35 @@ +# analyzer +analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer + +# dir +directory=FSDirectory +#directory=RamDirectory + +# index params +# multi int/boolean values would be iterated with calls to NewRound. +# they would be also added column in the reports, first string in the +# sequence is the column name. (Make sure it is no shorther than any value in te sequence) +max.buffered=buf.10.10.100.100 +merge.factor=mrg.10.100.10.100 +compound=true + +# properties for adding documents +stored=true +tokenized=true +term.vector=false +doc.add.log.step=500 + +# docs text files location in work dir +docs.dir=reuters-out +#docs.dir=reuters-111 + +# doc maker utility +#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker +doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker + +# query maker utility +#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker +query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker + +# task at this depth or less would print when they start +task.max.depth.log=0 Index: conf/task-sample.alg =================================================================== --- conf/task-sample.alg (revision 0) +++ conf/task-sample.alg (revision 0) @@ -0,0 +1,35 @@ +# -------------------------------------------------------- +# +# Sample: what is the effect of doc size on indexing time? +# +# There are two parts in this test: +# - PopulateShort adds 2N documents of length L +# - PopulateLong adds N documents of length 2L +# Which one would be faster? +# The comparison is done twice. +# +# -------------------------------------------------------- + +{ + + { "PopulateShort" + CreateIndex + { AddDoc(4000) > : 20000 + Optimize + CloseIndex + > + + ResetSystemErase + + { "PopulateLong" + CreateIndex + { AddDoc(8000) > : 10000 + Optimize + CloseIndex + > + + ResetSystemErase + +} : 2 + +RepSelectByPref Populate Index: conf/task-sample.properties =================================================================== --- conf/task-sample.properties (revision 0) +++ conf/task-sample.properties (revision 0) @@ -0,0 +1,35 @@ +# analyzer +analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer + +# dir +directory=FSDirectory +#directory=RamDirectory + +# index params +# multi int/boolean values would be iterated with calls to NewRound. +# they would be also added column in the reports, first string in the +# sequence is the column name. (Make sure it is no shorther than any value in te sequence) +merge.factor=mrg.10.100.10.100.10.100.10.100 +max.buffered=buf.10.10.100.100.10.10.100.100 +compound=cmpnd.true.true.true.true.false.false.false.false + +# properties for adding documents +stored=true +tokenized=true +term.vector=false +doc.add.log.step=2000 + +# docs text files location in work dir +docs.dir=reuters-out +#docs.dir=reuters-111 + +# doc maker utility +doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker +#doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker + +# query maker utility +query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker +#query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker + +# task at this depth or less would print when they start +task.max.depth.log=2 Index: conf/task-standard.alg =================================================================== --- conf/task-standard.alg (revision 0) +++ conf/task-standard.alg (revision 0) @@ -0,0 +1,44 @@ +{ "Rounds" + + ResetSystemErase + + { "Populate" + CreateIndex + { "MAddDocs" AddDoc } : 20000 + Optimize + CloseIndex + } + + OpenReader + { "SearchSameRdr" Search > : 5000 + CloseReader + + { "WarmNewRdr" Warm > : 50 + + { "SrchNewRdr" Search > : 500 + + { "SrchTrvNewRdr" SearchTrav > : 300 + + { "SrchTrvRetNewRdr" SearchTravRet > : 100 + + OpenReader + [ "SearchSameRdr" Search > : 5000 : 500 + CloseReader + + [ "WarmNewRdr" Warm > : 50 : 20 + + [ "SrchNewRdr" Search > : 50 : 20 + + [ "SrchTrvNewRdr" SearchTrav > : 300 : 20 + + [ "SrchTrvRetNewRdr" SearchTravRet > : 100 : 20 + + RepSumByPref MAddDocs + + NewRound + +} : 8 + +RepSumByNameRound +RepSumByName +RepSumByPrefRound MAddDocs Index: conf/task-standard.properties =================================================================== --- conf/task-standard.properties (revision 0) +++ conf/task-standard.properties (revision 0) @@ -0,0 +1,35 @@ +# analyzer +analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer + +# dir +directory=FSDirectory +#directory=RamDirectory + +# index params +# multi int/boolean values would be iterated with calls to NewRound. +# they would be also added column in the reports, first string in the +# sequence is the column name. (Make sure it is no shorther than any value in te sequence) +merge.factor=mrg.10.100.10.100.10.100.10.100 +max.buffered=buf.10.10.100.100.10.10.100.100 +compound=cmpnd.true.true.true.true.false.false.false.false + +# properties for adding documents +stored=true +tokenized=true +term.vector=false +doc.add.log.step=2000 + +# docs text files location in work dir +docs.dir=reuters-out +#docs.dir=reuters-111 + +# doc maker utility +#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker +doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker + +# query maker utility +#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker +query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker + +# task at this depth or less would print when they start +task.max.depth.log=0 Index: src/java/org/apache/lucene/benchmark/byTask/Benchmark.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (revision 0) @@ -0,0 +1,120 @@ +package org.apache.lucene.benchmark.byTask; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.FileInputStream; +import java.util.Iterator; +import java.util.Properties; + +import org.apache.lucene.benchmark.byTask.utils.Algorithm; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Run the benchmark algorithm. + *

Usage: java Benchmark properties-file algorithm-file + *

    + *
  1. Read config file. + *
  2. Read algorithm. + *
  3. Run the algorithm. + *
+ */ +public class Benchmark { + + /** + * Run the benchmark algorithm. + * @param args benchmark config and algorithm files + */ + public static void main(String[] args) { + // verify command line args + if (args.length < 2) { + System.err.println("Usage: java Benchmark "); + System.exit(1); + } + + // verify input files + File propFile = new File(args[0]); + File algFile = new File(args[1]); + if (!propFile.exists() || !propFile.isFile() || !propFile.canRead()) { + System.err.println("Error: cannot find/read config file: "+propFile.getAbsolutePath()); + System.exit(1); + } + if (!algFile.exists() || !algFile.isFile() || !algFile.canRead()) { + System.err.println("cannot find/read algorithm file: "+algFile.getAbsolutePath()); + System.exit(1); + } + + // read propetries + Properties props = new Properties(); + try { + props.load(new FileInputStream(propFile)); + } catch (Exception e) { + System.err.println("Error: cannot load configuration from file: "+propFile.getAbsolutePath()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("------------> config properties:"); + for (Iterator it = props.keySet().iterator(); it.hasNext();) { + String propName = (String) it.next(); + System.out.println(propName + " = " + props.getProperty(propName)); + } + System.out.println(); + + // last preparations + PerfRunData runData = null; + try { + runData = new PerfRunData(new Config(props)); + } catch (Exception e) { + System.err.println("Error: cannot init PerfRunData: "+e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("------------> queries:"); + System.out.println(runData.getSearchQueryMaker().printQueries()); + + // parse algorithm + Algorithm algorithm = null; + try { + algorithm = new Algorithm(algFile, runData); + } catch (Exception e) { + System.err.println("Error: cannot understand algorithm from file: "+algFile.getAbsolutePath()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("------------> algorithm:"); + System.out.println(algorithm.toString()); + + // execute + try { + algorithm.execute(); + } catch (Exception e) { + System.err.println("Error: cannot execute the algorithm! "+e.getMessage()); + e.printStackTrace(); + } + + System.out.println("####################"); + System.out.println("### D O N E !!! ###"); + System.out.println("####################"); + + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (revision 0) @@ -0,0 +1,219 @@ +package org.apache.lucene.benchmark.byTask; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.benchmark.byTask.feeds.DocMaker; +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; +import org.apache.lucene.benchmark.byTask.stats.Points; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.FileUtils; + + +/** + * Data maintained by a performance test run. + *

+ * Data includes: + *

    + *
  • Configuration. + *
  • Directory, Writer, Reader. + *
  • Docmaker and a few instances of QueryMaker. + *
  • Analyzer. + *
  • Statistics data which updated during the run. + *
+ */ +public class PerfRunData { + + private Points points; + + // objects used during performance test run + // directory, analyzer, docMaker - created at startup. + // reader, writer, searcher - maintained by basic tasks. + private Directory directory; + private Analyzer analyzer; + private DocMaker docMaker; + private QueryMaker searchQueryMaker; + private QueryMaker searchTravQueryMaker; + private QueryMaker searchTravRetQueryMaker; + + private IndexReader indexReader; + private IndexWriter indexWriter; + private Config config; + + // constructor + public PerfRunData (Config config) throws Exception { + this.config = config; + // analyzer (default is standard analyzer) + analyzer = (Analyzer) Class.forName(config.get("analyzer", + "org.apache.lucene.analysis.StandardAnalyzer")).newInstance(); + // doc maker + docMaker = (DocMaker) Class.forName(config.get("doc.maker", + "org.apache.lucene.benchmark.byTask.utils.SimpleDocMaker")).newInstance(); + docMaker.setConfig(config); + // query makers + // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately. + Class qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.utils.SimpleQueryMaker")); + searchQueryMaker = (QueryMaker) qmkrClass.newInstance(); + searchQueryMaker.setConfig(config); + searchTravQueryMaker = (QueryMaker) qmkrClass.newInstance(); + searchTravQueryMaker.setConfig(config); + searchTravRetQueryMaker = (QueryMaker) qmkrClass.newInstance(); + searchTravRetQueryMaker.setConfig(config); + // index stuff + reinit(false); + + // statistic points + points = new Points(config); + } + + // clean old stuff, reopen + public void reinit(boolean eraseIndex) throws Exception { + + // cleanup index + if (indexWriter!=null) { + indexWriter.close(); + indexWriter = null; + } + if (indexReader!=null) { + indexReader.close(); + indexReader = null; + } + if (directory!=null) { + directory.close(); + } + + // directory (default is ram-dir). + if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) { + File workDir = new File("work"); + File indexDir = new File(workDir,"index"); + if (eraseIndex && indexDir.exists()) { + FileUtils.fullyDelete(indexDir); + } + indexDir.mkdirs(); + directory = FSDirectory.getDirectory(indexDir, eraseIndex); + } else { + directory = new RAMDirectory(); + } + + // inputs + resetInputs(); + + // release unused stuff + System.runFinalization(); + System.gc(); + } + + /** + * @return Returns the points. + */ + public Points getPoints() { + return points; + } + + /** + * @return Returns the directory. + */ + public Directory getDirectory() { + return directory; + } + + /** + * @param directory The directory to set. + */ + public void setDirectory(Directory directory) { + this.directory = directory; + } + + /** + * @return Returns the indexReader. + */ + public IndexReader getIndexReader() { + return indexReader; + } + + /** + * @param indexReader The indexReader to set. + */ + public void setIndexReader(IndexReader indexReader) { + this.indexReader = indexReader; + } + + /** + * @return Returns the indexWriter. + */ + public IndexWriter getIndexWriter() { + return indexWriter; + } + + /** + * @param indexWriter The indexWriter to set. + */ + public void setIndexWriter(IndexWriter indexWriter) { + this.indexWriter = indexWriter; + } + + /** + * @return Returns the anlyzer. + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * @return Returns the docMaker. + */ + public DocMaker getDocMaker() { + return docMaker; + } + + /** + * @return Returns the config. + */ + public Config getConfig() { + return config; + } + + public void resetInputs() { + docMaker.resetInputs(); + searchQueryMaker.resetInputs(); + searchTravQueryMaker.resetInputs(); + searchTravRetQueryMaker.resetInputs(); + } + + /** + * @return Returns the searchQueryMaker. + */ + public QueryMaker getSearchQueryMaker() { + return searchQueryMaker; + } + + public QueryMaker getSearchTravQueryMaker() { + return searchTravQueryMaker; + } + + public QueryMaker getSearchTravRetQueryMaker() { + return searchTravRetQueryMaker; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/package.html (revision 0) @@ -0,0 +1,432 @@ + + + + Benchmarking Lucene By Tasks + + +
+Benchmarking Lucene By Tasks. +

+This package provides "task based" performance benchmarking of Lucene. +One can use the predefined benchmarks, or create new ones. +

+

+Contained packages: +

+ + + + + + + + + + + + + + + + + + + + + + +
PackageDescription
statsStatistics maintained when running benchmark tasks.
tasksBenchmark tasks.
feedsSources foe benchmark inputs: documents and queries.
utilsUtilities used for the benchmark, and for the reports.
+ +

Table Of Contents

+

+

    +
  1. Benchmarking By Tasks
  2. +
  3. How to use
  4. +
  5. Benchmark "algorithm"
  6. +
  7. Supported tasks/commands
  8. +
  9. Benchmark properties
  10. +
  11. Example input algorithm and the result benchmark report.
  12. +
+

+ +

Benchmarking By Tasks

+

+Benchmark Lucene using task primitives. +

+ +

+A benchmark is composed of some predefined tasks, allowing for creating an index, adding documents, +optimizing, searching, generating reports, and more. A benchmark run takes an "algorithm" file +that describes the sequence of tasks making up the run, and a properties file defining a few +additional characteristics of the benchmark run. +

+ + +

How to use

+

+Predefined benchmarks are ran using the predefined ant tasks: +

    +
  • ant run-task-standard +
  • any run-task-micro-standard +
+ +

+You can create your own benchmark by modifying one of the predefined .alg and .properties +files and using the appropriate ant target, or by providing your own .alg and .properties files. +In this case, you should run the class apache.lucene.taskBenchmark.Benchmark and provide +the two arguments: file.properties file.alg. +

+

+It is very likely that this would be sufficient for defining the benchmark you need, +otherwise, you can extend the framework to meet your needs, as explained herein. +

+ +

+Each benchmark run has a DocMaker and a QueryMaker. These two should usually match, so +that "meaningful" queries are used for a certain collection. You can modify +the properties file to define which "makers" should be used. You can also +specify your own, extending the DocMaker and QureyMaker interfaces. +

+ +

+Benchmark .alg file contains the benchmark "algorithm". The syntax is described below. +Within the algorithm, you can specify groups of commands, assign them names, specify commands that should be repeated, +do commands in serial or in parallel, and also control the speed of "firing" the commands. +

+

+This allows, for instance, to specify +that an index should be opened for update, +documents should be added to it one by one but not faster than 20 docs a minute, +and, in parallel with this, +some N queries should be searched against that index, +again, no more than 2 queries a second. +You can have the searches all share an index searcher, +or have them each open its own searcher and close it afterwords. +

+ +

+If the commands available for use in the algorithm do not meet your needs, +you can add commands by adding a new task under +org.apache.lucene.taskBenchmark.task - +you should extend the PerfTask abstract class. +Make sure that your new task class name is suffixed by Task. +Assume you added the class "WonderfulTask" - doing so also enables the +command "Wonderful" to be used in the algorithm. +

+ + +

Benchmark "algorithm"

+ +

+The following is an informal description of the supported syntax. +

+ +
    +
  1. + Measuring: When a command is executed, statistics for the elapsed execution time and memory consumption are collected. + At any time, those statistics can be printed, using one of the available ReportTasks. +
  2. +
  3. + Comments start with '#'. +
  4. +
  5. + Serial sequences are enclosed within '{ }'. +
  6. +
  7. + Parallel sequences are enclosed within '[ ]' +
  8. +
  9. + Sequence naming: To name a sequence, put '"name"' just after '{' or '['. +
    Example - { "ManyAdds" AddDoc } : 1000000 - would + name the sequence of 1M add docs "ManyAdds", and this name would later appear in statistic reports. + If you don't specify a name for a sequence, it is given one: you can see it as the + algorithm is printed just before benchmark execution starts. +
  10. +
  11. + Repeating: + To repeat sequence tasks N times, add ': N' just after the + sequence closing tag - '}' or ']' or '>'. +
    Example - [ AddDoc ] : 4 - would do 4 addDoc in parallel, spawning 4 threads at once. +
    Example - [ AddDoc AddDoc ] : 4 - would do 8 addDoc in parallel, spawning 8 threads at once. +
    Example - { AddDoc } : 30 - would do addDoc 30 times in a row. +
    Example - { AddDoc AddDoc } : 30 - would do addDoc 60 times in a row. +
  12. +
  13. + Command parameter: a command can take a single parameter. + If the certain command does not support a parameter, or if the parameter is of the wrong type, + reading the algorithm will fail with an exception and the test would not start. + Currently only AddDoc supports a (numeric) parameter, which indicates the required size of added document. + If the DocMaker implementation used in the test does not support makeDoc(size), an exception would be thrown and the test would fail. +
    Example - AddDoc(2000) - would add a document of size 2000 (~bytes). +
    See conf/task-sample.alg for how this can be used, for instance, to check which is faster, adding + many smaller documents, or few larger documents. + Next candidates for supporting a parameter may be the Search tasks, for controlling the qurey size. +
  14. +
  15. + Statistic recording elimination: - a sequence can also end with '>', + in which case child tasks would not store their statistics. + This can be useful to avoid exploding stats data, for adding say 1M docs. +
    Example - { "ManyAdds" AddDoc > : 1000000 - + would add million docs, measure that total, but not save stats for each addDoc. +
    Notice that the granularity of System.currentTimeMillis() (which is used here) is system dependant, + and in some systems an operation that takes 5 ms to complete may show 0 ms latency time in performance measurements. + Therefore it is sometimes more accurate to look at the elapsed time of a larger sequence, as demonstrated here. +
  16. +
  17. + Rate: + To set a rate (ops/sec or ops/min) for a sequence, add ': N : R' just after sequence closing tag. + This would specify repetition of N with rate of R operations/sec. + Use 'R/sec' or 'R/min' + to explicitely specify that the rate is per second or per minute. + The default is per second, +
    Example - [ AddDoc ] : 400 : 3 - would do 400 addDoc in parallel, starting up to 3 threads per second. +
    Example - { AddDoc } : 100 : 200/min - would do 100 addDoc serially, + waiting before starting next add, if otherwise rate would exceed 200 adds/min. +
  18. +
  19. + Command names: Each class "AnyNameTask" in the package org.apache.lucene.taskBenchmark.task, + that extends PerfTask, is supported as command "AnyName" that can be + used in the benchmark "algorithm" description. + This allows to add new commands by just adding such classes. +
  20. +
+ + + +

Supported tasks/commands

+ +

+Existing tasks can be divided into a few groups: +regular index/search work tasks, report tasks, and control tasks. +

+ +
    + +
  1. + Report tasks: There are a few Report commands for generating reports. + Only task runs that were completed are reported. + (The 'Report tasks' themselves are not measured and not reported.) +
      +
    • + RepAll - all (completed) task runs. +
    • +
    • + RepSumByName - all statistics, aggregated by name. So, if AddDoc was executed 2000 times, + only 1 report line would be created for it, aggregating all those 2000 statistic records. +
    • +
    • + RepSelectByPref   prefixWord - all records for tasks whose name start with prefixWord. +
    • +
    • + RepSumByPref   prefixWord - all records for tasks whose name start with prefixWord, + aggregated by their full task name. +
    • +
    • + RepSumByNameRound - all statistics, aggregated by name and by Round. + So, if AddDoc was executed 2000 times in each of 3 rounds, 3 report lines would be created for it, + aggregating all those 2000 statistic records in each round. See more about rounds in the NewRound command description below. +
    • +
    • + RepSumByPrefRound   prefixWord - similar to RepSumByNameRound, + just that only tasks whose name starts with prefixWord are included. +
    • +
    + If needed, additional reports can be added by extending the abstract class ReportTask, and by + manipulating the statistics data in Points and TaskStats. +
  2. + +
  3. Control tasks: Few of the tasks control the benchmark algorithm all over: +
      +
    • + ClearStats - clears the entire statistics. + Further reports would only include task runs that would start after this call. +
    • +
    • + NewRound - virtually start a new round of performance test. + Although this command can be placed anywhere, it mostly makes sense at the end of an outermost sequence. +
      This increments a global "round counter". All task runs that would start now would + record the new, updated round counter as their round number. This would appear in reports. + In particular, see RepSumByNameRound above. +
      An additional effect of NewRound, is that numeric and boolean properties defined in the + .properties file as a sequence of values, e.g. merge.factor=mrg.10.100.10.100 would + increment (cyclic) to the next value. + Note: this would also be reflected in the reports, in this case under a column that would be named "mrg". +
    • +
    • + ResetInputs - DocMaker and the various QueryMakers + would reset their counters to start. + The way these Maker interfaces work, each call for makeDocument() + or makeQuery() creates the next document or query + that it "knows" to create. + If that pool is "exhausted", the "maker" start over again. The resetInpus command + therefore allows to make the rounds comparable. + It is therefore useful to invoke ResetInputs together with NewRound. +
    • +
    • + ResetSystemErase - reset all index and input data and call gc. + Does NOT reset statistics. This contains ResetInputs. + All writers/readers are nullified, deleted, closed. + Index is erased. + Directory is erased. + You would have to call CreateIndex once this was called... +
    • +
    • + ResetSystemSoft - reset all index and input data and call gc. + Does NOT reset statistics. This contains ResetInputs. + All writers/readers are nullified, closed. + Index is NOT erased. + Directory is NOT erased. + This is useful for testing performance on an existing index, for instance if the construction of a large index + took a very long time and now you would to test its search or update performance. +
    • +
    +
  4. + +
  5. + Other existing tasks are quite straightforward and would just be briefly described here. +
      +
    • + CreateIndex and OpenIndex both leave the index open for later update operations. + CloseIndex would close it. +
    • +
    • + OpenReader, similarly, would leave an index reader open for later search operations. + But this have further semantics. + If a Read operation is performed, and an open reader exists, it would be used. + Otherwise, the read operation would open its own reader and close it when the read operation is done. + This allows testing various scenarios - sharing a reader, searching with "cold" reader, with "warmed" reader, etc. + The read operations affected by this are: Warm, + Search, SearchTrav (search and traverse), + and SearchTravRet (search and traverse and retrieve). + Notice that each of the 3 search task types maintains its own queryMaker instance. +
    • +
    +
+ + +

Benchmark properties

+ +

+Properties are read from the .properties file, and +define several parameters of the performance test. +As mentioned above for the NewRound task, +numeric and boolean properties that are defined as a sequence +of values, e.g. merge.factor=mrg.10.100.10.100 +would increment (cyclic) to the next value, when NewRound is called, and would also +appear as a named column in the reports (column name would be "mrg" in this example). +

+ +

+Some of the currently defined properties are: +

+ +
    +
  1. + analyzer - full class name for the analyzer to use. + Same analyzer would be used in the entire test. +
  2. + +
  3. + directory - valid values are FSDirectory and RAMDirectory. + This tells which directory to use for the performance test. +
  4. + +
  5. + Index work parameters: + Multi int/boolean values would be iterated with calls to NewRound. + There would be also added as columns in the reports, first string in the + sequence is the column name. + (Make sure it is no shorter than any value in the sequence). +
      +
    • max.buffered +
      Example: buffered=buf.10.10.100.100 - + this would define using maxBufferedDocs of 10 in iterations 0 and 1, + and 100 in iterations 2 and 3. +
    • +
    • + merge.factor - which + merge factor to use. +
    • +
    • + compound - whether the index is + using the compound format or not. Valid values are "true" and "false". +
    • +
    +
+ +

+For additional defined properties see the task*.properties file under conf. +

+ + +

Example input algorithm and the result benchmark report

+

+The following example is in conf/task-sample.alg: +

+# --------------------------------------------------------
+#
+# Sample: what is the effect of doc size on indexing time?
+#
+# There are two parts in this test:
+# - PopulateShort adds 2N documents of length  L
+# - PopulateLong  adds  N documents of length 2L
+# Which one would be faster?
+# The comparison is done twice.
+#
+# --------------------------------------------------------
+
+{
+
+    { "PopulateShort"
+        CreateIndex
+        { AddDoc(4000) > : 20000
+        Optimize
+        CloseIndex
+    >
+
+    ResetSystemErase
+
+    { "PopulateLong"
+        CreateIndex
+        { AddDoc(8000) > : 10000
+        Optimize
+        CloseIndex
+    >
+
+    ResetSystemErase
+
+} : 2
+
+RepSelectByPref Populate
+
+

+

+The output report from running this test is the following: +

+Operation     round cmpnd buf mrg   runCnt   recsPerRun        rec/s  elapsedSec    avgUsedMem    avgTotalMem
+PopulateShort     0  true  10  10        1        20003        106.2      188.36     1,664,232      4,194,304
+PopulateLong -  - 0  true  10  10 -  -   1 -  -   10003 -  -  - 89.6 -  - 111.69 -   2,257,112 -  - 4,194,304
+PopulateShort     0  true  10  10        1        20003        107.5      186.14     2,972,088      4,194,304
+PopulateLong -  - 0  true  10  10 -  -   1 -  -   10003 -  -  - 85.9 -  - 116.42 -   2,980,024 -  - 4,194,304
+
+

+
+
 
+ + Index: src/java/org/apache/lucene/benchmark/byTask/Benchmark.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (revision 0) @@ -0,0 +1,120 @@ +package org.apache.lucene.benchmark.byTask; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.FileInputStream; +import java.util.Iterator; +import java.util.Properties; + +import org.apache.lucene.benchmark.byTask.utils.Algorithm; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Run the benchmark algorithm. + *

Usage: java Benchmark properties-file algorithm-file + *

    + *
  1. Read config file. + *
  2. Read algorithm. + *
  3. Run the algorithm. + *
+ */ +public class Benchmark { + + /** + * Run the benchmark algorithm. + * @param args benchmark config and algorithm files + */ + public static void main(String[] args) { + // verify command line args + if (args.length < 2) { + System.err.println("Usage: java Benchmark "); + System.exit(1); + } + + // verify input files + File propFile = new File(args[0]); + File algFile = new File(args[1]); + if (!propFile.exists() || !propFile.isFile() || !propFile.canRead()) { + System.err.println("Error: cannot find/read config file: "+propFile.getAbsolutePath()); + System.exit(1); + } + if (!algFile.exists() || !algFile.isFile() || !algFile.canRead()) { + System.err.println("cannot find/read algorithm file: "+algFile.getAbsolutePath()); + System.exit(1); + } + + // read propetries + Properties props = new Properties(); + try { + props.load(new FileInputStream(propFile)); + } catch (Exception e) { + System.err.println("Error: cannot load configuration from file: "+propFile.getAbsolutePath()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("------------> config properties:"); + for (Iterator it = props.keySet().iterator(); it.hasNext();) { + String propName = (String) it.next(); + System.out.println(propName + " = " + props.getProperty(propName)); + } + System.out.println(); + + // last preparations + PerfRunData runData = null; + try { + runData = new PerfRunData(new Config(props)); + } catch (Exception e) { + System.err.println("Error: cannot init PerfRunData: "+e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("------------> queries:"); + System.out.println(runData.getSearchQueryMaker().printQueries()); + + // parse algorithm + Algorithm algorithm = null; + try { + algorithm = new Algorithm(algFile, runData); + } catch (Exception e) { + System.err.println("Error: cannot understand algorithm from file: "+algFile.getAbsolutePath()); + e.printStackTrace(); + System.exit(1); + } + + System.out.println("------------> algorithm:"); + System.out.println(algorithm.toString()); + + // execute + try { + algorithm.execute(); + } catch (Exception e) { + System.err.println("Error: cannot execute the algorithm! "+e.getMessage()); + e.printStackTrace(); + } + + System.out.println("####################"); + System.out.println("### D O N E !!! ###"); + System.out.println("####################"); + + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (revision 0) @@ -0,0 +1,156 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * A DocMaker using the Reuters collection for its input. + */ +public class ReutersDocMaker extends SimpleDocMaker { + + private DateFormat dateFormat; + private File dataDir = null; + private ArrayList txtFiles = new ArrayList(); + private int nextFile = 0; + private int round=0; + private int count = 0; + + /* (non-Javadoc) + * @see SimpleDocMaker#setConfig(java.util.Properties) + */ + public void setConfig(Config config) { + super.setConfig(config); + String d = config.get("docs.dir","reuters-out"); + dataDir = new File(new File("work"),d); + addFiles(dataDir); + if (txtFiles.size()==0) { + throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); + } + // date format: 30-MAR-1987 14:22:36.87 + dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS"); + dateFormat.setLenient(true); + } + + private void addFiles(File f) { + if (!f.canRead()) { + return; + } + if (f.isDirectory()) { + File files[] = f.listFiles(); + for (int i = 0; i < files.length; i++) { + addFiles(files[i]); + } + return; + } + txtFiles.add(f); + addUniqueBytes(f.length()); + } + + /* (non-Javadoc) + * @see SimpleDocMaker#makeDocument() + */ + public Document makeDocument() throws Exception { + File f = null; + String name = null; + synchronized (this) { + f = (File) txtFiles.get(nextFile++); + name = f.getCanonicalPath()+"_"+round; + if (nextFile >= txtFiles.size()) { + // exhausted files, start a new round + nextFile = 0; + round++; + } + } + + Document doc = new Document(); + doc.add(new Field("name",name,storeVal,indexVal,termVecVal)); + BufferedReader reader = new BufferedReader(new FileReader(f)); + String line = null; + //First line is the date, 3rd is the title, rest is body + String dateStr = reader.readLine(); + reader.readLine();//skip an empty line + String title = reader.readLine(); + reader.readLine();//skip an empty line + StringBuffer body = new StringBuffer(1024); + while ((line = reader.readLine()) != null) { + body.append(line).append(' '); + } + Date date = dateFormat.parse(dateStr.trim()); + doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND), + Field.Store.YES, Field.Index.UN_TOKENIZED)); + + if (title != null) { + doc.add(new Field("title", title, storeVal,indexVal,termVecVal)); + } + if (body.length() > 0) { + doc.add(new Field("body", body.toString(), storeVal,indexVal,termVecVal)); + } + + count++; + addBytes(f.length()); + + return doc; + } + + /* + * (non-Javadoc) + * @see DocMaker#resetIinputs() + */ + public synchronized void resetInputs() { + super.resetInputs(); + nextFile = 0; + round = 0; + count = 0; + } + + /* + * (non-Javadoc) + * @see DocMaker#numUniqueTexts() + */ + public int numUniqueTexts() { + return txtFiles.size(); + } + + /* + * (non-Javadoc) + * @see DocMaker#getCount() + */ + public int getCount() { + return count; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int) + */ + public Document makeDocument(int size) throws Exception { + throw new Exception(this+".makeDocument (int size) is not supported!"); + } +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (revision 0) @@ -0,0 +1,159 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.spans.SpanFirstQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * A QueryMaker tha makes queries devised manually (by Grant Ingersoll) for + * searching in the Reuters collection. + */ +public class ReutersQueryMaker implements QueryMaker { + + private int qnum = 0; + private Query queries[]; + private Config config; + + private static String [] STANDARD_QUERIES = { + //Start with some short queries + "Salomon", "Comex", "night trading", "Japan Sony", + //Try some Phrase Queries + "\"Sony Japan\"", "\"food needs\"~3", + "\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria", + "\"Ford Credit\"~5", + //Try some longer queries + "airline Europe Canada destination", + "Long term pressure by trade " + + "ministers is necessary if the current Uruguay round of talks on " + + "the General Agreement on Trade and Tariffs (GATT) is to " + + "succeed" + }; + + private static Query[] getPrebuiltQueries(String field) { + // be wary of unanalyzed text + return new Query[] { + new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5), + new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false), + new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false), + new WildcardQuery(new Term(field, "fo*")), + }; + } + + /** + * Parse the strings containing Lucene queries. + * + * @param qs array of strings containing query expressions + * @param a analyzer to use when parsing queries + * @return array of Lucene queries + */ + private static Query[] createQueries(List qs, Analyzer a) { + QueryParser qp = new QueryParser("body", a); + List queries = new ArrayList(); + for (int i = 0; i < qs.size(); i++) { + try { + + Object query = qs.get(i); + Query q = null; + if (query instanceof String) { + q = qp.parse((String) query); + + } else if (query instanceof Query) { + q = (Query) query; + + } else { + System.err.println("Unsupported Query Type: " + query); + } + + if (q != null) { + queries.add(q); + } + + } catch (Exception e) { + e.printStackTrace(); + } + } + + return (Query[]) queries.toArray(new Query[0]); + } + + private void prepareQueries() throws Exception { + // analyzer (default is standard analyzer) + Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer", + "org.apache.lucene.analysis.StandardAnalyzer")).newInstance(); + + List queryList = new ArrayList(20); + queryList.addAll(Arrays.asList(STANDARD_QUERIES)); + queryList.addAll(Arrays.asList(getPrebuiltQueries("body"))); + queries = createQueries(queryList, anlzr); + } + + public Query makeQuery() throws Exception { + return queries[nextQnum()]; + } + + public void setConfig(Config config) throws Exception { + this.config = config; + prepareQueries(); + } + + public void resetInputs() { + qnum = 0; + } + + // return next qnum + private synchronized int nextQnum() { + int res = qnum; + qnum = (qnum+1) % queries.length; + return res; + } + + public String printQueries() { + String newline = System.getProperty("line.separator"); + StringBuffer sb = new StringBuffer(); + if (queries != null) { + for (int i = 0; i < queries.length; i++) { + sb.append(i+". "+queries[i].getClass().getSimpleName()+" - "+queries[i].toString()); + sb.append(newline); + } + } + return sb.toString(); + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int) + */ + public Query makeQuery(int size) throws Exception { + throw new Exception(this+".makeQuery(int size) is not supported!"); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (revision 0) @@ -0,0 +1,226 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.Format; + + +/** + * Create documents for the test + */ +public class SimpleDocMaker implements DocMaker { + + private int docID = 0; + private long numBytes = 0; + private long numUniqueBytes = 0; + + protected Config config; + private int nextDocTextPosition = 0; // for creating docs of fixed size. + + protected Field.Store storeVal = Field.Store.NO; + protected Field.Index indexVal = Field.Index.TOKENIZED; + protected Field.TermVector termVecVal = Field.TermVector.NO; + + private static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org + "Well it may be a little dramatic but sometimes it true. " + + "If you call the emergency medical services to an incident, " + + "your actions have started the chain of survival. " + + "You have acted to help someone you may not even know. " + + "First aid is helping, first aid is making that call, " + + "putting a Band-Aid on a small wound, controlling bleeding in large " + + "wounds or providing CPR for a collapsed person whose not breathing " + + "and heart has stopped beating. You can help yourself, your loved " + + "ones and the stranger whose life may depend on you being in the " + + "right place at the right time with the right knowledge."; + + private static int DOC_TEXT_LENGTH = DOC_TEXT.length(); + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument() + */ + public Document makeDocument () throws Exception { + return makeDocument(0); + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int) + */ + public Document makeDocument(int size) throws Exception { + int docid = newdocid(); + Document doc = new Document(); + doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal)); + String docText = createDocText(size); + doc.add(new Field("body", "synthetic body text"+docid+" "+docText, storeVal, indexVal, termVecVal)); + addBytes(docText.length()); // should multiply by 2 here? + return doc; + } + + private synchronized int[] nextDocText(int fixedDocSize) { + int from = nextDocTextPosition; + int to = nextDocTextPosition; + int wraps = 0; + int size = 0; + + while (size 0) { + sb.append(DOC_TEXT.substring(from)); + from = 0; + } + sb.append(DOC_TEXT.substring(from,to)); + return sb.toString(); + } + + // return a new docid + private synchronized int newdocid() { + return docID++; + } + + /* (non-Javadoc) + * @see DocMaker#setConfig(java.util.Properties) + */ + public void setConfig(Config config) { + this.config = config; + boolean stored = config.get("stored",false); + boolean tokenized = config.get("tokenized",true); + boolean termVec = config.get("term.vector",false); + storeVal = (stored ? Field.Store.YES : Field.Store.NO); + indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED); + termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO); + } + + /* + * (non-Javadoc) + * @see DocMaker#resetIinputs() + */ + public synchronized void resetInputs() { + printDocStatistics(); + docID = 0; + numBytes = 0; + } + + /* + * (non-Javadoc) + * @see DocMaker#numUniqueTexts() + */ + public int numUniqueTexts() { + return 0; // not applicable + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes() + */ + public long numUniqueBytes() { + return numUniqueBytes; + } + + /* + * (non-Javadoc) + * @see DocMaker#getCount() + */ + public int getCount() { + return docID; + } + + /* + * (non-Javadoc) + * @see DocMaker#getByteCount() + */ + public long getByteCount() { + return numBytes; + } + + protected void addUniqueBytes (long n) { + numUniqueBytes += n; + } + + protected void addBytes (long n) { + numBytes += n; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics() + */ + private int lastPrintedNumUniqueTexts = 0; + private long lastPrintedNumUniqueBytes = 0; + private int printNum = 0; + public void printDocStatistics() { + boolean print = false; + String col = " "; + StringBuffer sb = new StringBuffer(); + String newline = System.getProperty("line.separator"); + sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); + int nut = numUniqueTexts(); + if (nut > lastPrintedNumUniqueTexts) { + print = true; + sb.append("total bytes of unique texts: ").append(Format.format(0,nut,col)).append(newline); + lastPrintedNumUniqueTexts = nut; + } + long nub = numUniqueBytes(); + if (nub > lastPrintedNumUniqueBytes) { + print = true; + sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline); + lastPrintedNumUniqueBytes = nub; + } + if (getCount()>0) { + print = true; + sb.append("num files added since last inputs reset: ").append(Format.format(0,getCount(),col)).append(newline); + sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline); + } + if (print) { + System.out.println(sb.append(newline).toString()); + printNum++; + } + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java (revision 0) @@ -0,0 +1,103 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.BooleanClause.Occur; + + +public class SimpleQueryMaker implements QueryMaker { + + private int qnum = 0; + private Query queries[]; + private Config config; + + private void prepareQueries() throws Exception { + // analyzer (default is standard analyzer) + Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer", + "org.apache.lucene.analysis.StandardAnalyzer")).newInstance(); + + QueryParser qp = new QueryParser("body",anlzr); + ArrayList qq = new ArrayList(); + Query q1 = new TermQuery(new Term("docid","doc2")); + qq.add(q1); + Query q2 = new TermQuery(new Term("body","simple")); + qq.add(q2); + BooleanQuery bq = new BooleanQuery(); + bq.add(q1,Occur.MUST); + bq.add(q2,Occur.MUST); + qq.add(bq); + qq.add(qp.parse("synthetic body")); + qq.add(qp.parse("\"synthetic body\"")); + qq.add(qp.parse("synthetic text")); + qq.add(qp.parse("\"synthetic text\"")); + qq.add(qp.parse("\"synthetic text\"~3")); + qq.add(qp.parse("zoom*")); + qq.add(qp.parse("synth*")); + queries = (Query []) qq.toArray(new Query[0]); + } + + public Query makeQuery() throws Exception { + return queries[nextQnum()]; + } + + public void setConfig(Config config) throws Exception { + this.config = config; + prepareQueries(); + } + + public void resetInputs() { + qnum = 0; + } + + // return next qnum + private synchronized int nextQnum() { + int res = qnum; + qnum = (qnum+1) % queries.length; + return res; + } + + public String printQueries() { + String newline = System.getProperty("line.separator"); + StringBuffer sb = new StringBuffer(); + if (queries != null) { + for (int i = 0; i < queries.length; i++) { + sb.append(i+". "+queries[i].getClass().getSimpleName()+" - "+queries[i].toString()); + sb.append(newline); + } + } + return sb.toString(); + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int) + */ + public Query makeQuery(int size) throws Exception { + throw new Exception(this+".makeQuery(int size) is not supported!"); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 0) @@ -0,0 +1,64 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Create documents for the test. + *
Each call to makeDocument would create the next document. + * When input is exhausted, the DocMaker iterates over the input again, + * does provifing a source for unlimited number of documents, + * though not all of them are unique. + */ +public interface DocMaker { + + /** + * Create the next document, of the given size by input bytes. + * If the implementation does not support control over size, an exception is thrown. + * @param size size of document, or 0 if there is no size requirement. + * @exception if cannot make the document, or if size>0 was specified but this feature is not supported. + */ + public Document makeDocument (int size) throws Exception; + + /** Create the next document. */ + public Document makeDocument () throws Exception; + + /** Set the properties */ + public void setConfig (Config config); + + /** Reset inputs so that the test run would behave, input wise, as if it just started. */ + public void resetInputs(); + + /** Return how many real unique texts are available, 0 if not applicable. */ + public int numUniqueTexts(); + + /** Return total bytes of all available unique texts, 0 if not applicable */ + public long numUniqueBytes(); + + /** Return number of docs made since last reset. */ + public int getCount(); + + /** Return total byte size of docs made since last reset. */ + public long getByteCount(); + + /** Print some statistics on docs available/added/etc. */ + public void printDocStatistics(); +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (revision 0) @@ -0,0 +1,49 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.search.Query; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Create queries for the test. + */ +public interface QueryMaker { + + /** + * Create the next query, of the given size. + * @param size the size of the query - number of terms, etc. + * @exception if cannot make the query, or if size>0 was specified but this feature is not supported. + */ + public Query makeQuery (int size) throws Exception; + + /** Create the next query */ + public Query makeQuery () throws Exception; + + /** Set the properties + * @throws Exception */ + public void setConfig (Config config) throws Exception; + + /** Reset inputs so that the test run would behave, input wise, as if it just started. */ + public void resetInputs(); + + /** Print the queries */ + public String printQueries(); +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/package.html (revision 0) @@ -0,0 +1,6 @@ + + +Sources for benchmark inputs: documents and queries. + + + Index: src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 0) @@ -0,0 +1,64 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Create documents for the test. + *
Each call to makeDocument would create the next document. + * When input is exhausted, the DocMaker iterates over the input again, + * does provifing a source for unlimited number of documents, + * though not all of them are unique. + */ +public interface DocMaker { + + /** + * Create the next document, of the given size by input bytes. + * If the implementation does not support control over size, an exception is thrown. + * @param size size of document, or 0 if there is no size requirement. + * @exception if cannot make the document, or if size>0 was specified but this feature is not supported. + */ + public Document makeDocument (int size) throws Exception; + + /** Create the next document. */ + public Document makeDocument () throws Exception; + + /** Set the properties */ + public void setConfig (Config config); + + /** Reset inputs so that the test run would behave, input wise, as if it just started. */ + public void resetInputs(); + + /** Return how many real unique texts are available, 0 if not applicable. */ + public int numUniqueTexts(); + + /** Return total bytes of all available unique texts, 0 if not applicable */ + public long numUniqueBytes(); + + /** Return number of docs made since last reset. */ + public int getCount(); + + /** Return total byte size of docs made since last reset. */ + public long getByteCount(); + + /** Print some statistics on docs available/added/etc. */ + public void printDocStatistics(); +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/package.html (revision 0) @@ -0,0 +1,6 @@ + + +Sources for benchmark inputs: documents and queries. + + + Index: src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/QueryMaker.java (revision 0) @@ -0,0 +1,49 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.search.Query; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Create queries for the test. + */ +public interface QueryMaker { + + /** + * Create the next query, of the given size. + * @param size the size of the query - number of terms, etc. + * @exception if cannot make the query, or if size>0 was specified but this feature is not supported. + */ + public Query makeQuery (int size) throws Exception; + + /** Create the next query */ + public Query makeQuery () throws Exception; + + /** Set the properties + * @throws Exception */ + public void setConfig (Config config) throws Exception; + + /** Reset inputs so that the test run would behave, input wise, as if it just started. */ + public void resetInputs(); + + /** Print the queries */ + public String printQueries(); +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java (revision 0) @@ -0,0 +1,156 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * A DocMaker using the Reuters collection for its input. + */ +public class ReutersDocMaker extends SimpleDocMaker { + + private DateFormat dateFormat; + private File dataDir = null; + private ArrayList txtFiles = new ArrayList(); + private int nextFile = 0; + private int round=0; + private int count = 0; + + /* (non-Javadoc) + * @see SimpleDocMaker#setConfig(java.util.Properties) + */ + public void setConfig(Config config) { + super.setConfig(config); + String d = config.get("docs.dir","reuters-out"); + dataDir = new File(new File("work"),d); + addFiles(dataDir); + if (txtFiles.size()==0) { + throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); + } + // date format: 30-MAR-1987 14:22:36.87 + dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS"); + dateFormat.setLenient(true); + } + + private void addFiles(File f) { + if (!f.canRead()) { + return; + } + if (f.isDirectory()) { + File files[] = f.listFiles(); + for (int i = 0; i < files.length; i++) { + addFiles(files[i]); + } + return; + } + txtFiles.add(f); + addUniqueBytes(f.length()); + } + + /* (non-Javadoc) + * @see SimpleDocMaker#makeDocument() + */ + public Document makeDocument() throws Exception { + File f = null; + String name = null; + synchronized (this) { + f = (File) txtFiles.get(nextFile++); + name = f.getCanonicalPath()+"_"+round; + if (nextFile >= txtFiles.size()) { + // exhausted files, start a new round + nextFile = 0; + round++; + } + } + + Document doc = new Document(); + doc.add(new Field("name",name,storeVal,indexVal,termVecVal)); + BufferedReader reader = new BufferedReader(new FileReader(f)); + String line = null; + //First line is the date, 3rd is the title, rest is body + String dateStr = reader.readLine(); + reader.readLine();//skip an empty line + String title = reader.readLine(); + reader.readLine();//skip an empty line + StringBuffer body = new StringBuffer(1024); + while ((line = reader.readLine()) != null) { + body.append(line).append(' '); + } + Date date = dateFormat.parse(dateStr.trim()); + doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND), + Field.Store.YES, Field.Index.UN_TOKENIZED)); + + if (title != null) { + doc.add(new Field("title", title, storeVal,indexVal,termVecVal)); + } + if (body.length() > 0) { + doc.add(new Field("body", body.toString(), storeVal,indexVal,termVecVal)); + } + + count++; + addBytes(f.length()); + + return doc; + } + + /* + * (non-Javadoc) + * @see DocMaker#resetIinputs() + */ + public synchronized void resetInputs() { + super.resetInputs(); + nextFile = 0; + round = 0; + count = 0; + } + + /* + * (non-Javadoc) + * @see DocMaker#numUniqueTexts() + */ + public int numUniqueTexts() { + return txtFiles.size(); + } + + /* + * (non-Javadoc) + * @see DocMaker#getCount() + */ + public int getCount() { + return count; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int) + */ + public Document makeDocument(int size) throws Exception { + throw new Exception(this+".makeDocument (int size) is not supported!"); + } +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java (revision 0) @@ -0,0 +1,159 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.spans.SpanFirstQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * A QueryMaker tha makes queries devised manually (by Grant Ingersoll) for + * searching in the Reuters collection. + */ +public class ReutersQueryMaker implements QueryMaker { + + private int qnum = 0; + private Query queries[]; + private Config config; + + private static String [] STANDARD_QUERIES = { + //Start with some short queries + "Salomon", "Comex", "night trading", "Japan Sony", + //Try some Phrase Queries + "\"Sony Japan\"", "\"food needs\"~3", + "\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria", + "\"Ford Credit\"~5", + //Try some longer queries + "airline Europe Canada destination", + "Long term pressure by trade " + + "ministers is necessary if the current Uruguay round of talks on " + + "the General Agreement on Trade and Tariffs (GATT) is to " + + "succeed" + }; + + private static Query[] getPrebuiltQueries(String field) { + // be wary of unanalyzed text + return new Query[] { + new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5), + new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false), + new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false), + new WildcardQuery(new Term(field, "fo*")), + }; + } + + /** + * Parse the strings containing Lucene queries. + * + * @param qs array of strings containing query expressions + * @param a analyzer to use when parsing queries + * @return array of Lucene queries + */ + private static Query[] createQueries(List qs, Analyzer a) { + QueryParser qp = new QueryParser("body", a); + List queries = new ArrayList(); + for (int i = 0; i < qs.size(); i++) { + try { + + Object query = qs.get(i); + Query q = null; + if (query instanceof String) { + q = qp.parse((String) query); + + } else if (query instanceof Query) { + q = (Query) query; + + } else { + System.err.println("Unsupported Query Type: " + query); + } + + if (q != null) { + queries.add(q); + } + + } catch (Exception e) { + e.printStackTrace(); + } + } + + return (Query[]) queries.toArray(new Query[0]); + } + + private void prepareQueries() throws Exception { + // analyzer (default is standard analyzer) + Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer", + "org.apache.lucene.analysis.StandardAnalyzer")).newInstance(); + + List queryList = new ArrayList(20); + queryList.addAll(Arrays.asList(STANDARD_QUERIES)); + queryList.addAll(Arrays.asList(getPrebuiltQueries("body"))); + queries = createQueries(queryList, anlzr); + } + + public Query makeQuery() throws Exception { + return queries[nextQnum()]; + } + + public void setConfig(Config config) throws Exception { + this.config = config; + prepareQueries(); + } + + public void resetInputs() { + qnum = 0; + } + + // return next qnum + private synchronized int nextQnum() { + int res = qnum; + qnum = (qnum+1) % queries.length; + return res; + } + + public String printQueries() { + String newline = System.getProperty("line.separator"); + StringBuffer sb = new StringBuffer(); + if (queries != null) { + for (int i = 0; i < queries.length; i++) { + sb.append(i+". "+queries[i].getClass().getSimpleName()+" - "+queries[i].toString()); + sb.append(newline); + } + } + return sb.toString(); + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int) + */ + public Query makeQuery(int size) throws Exception { + throw new Exception(this+".makeQuery(int size) is not supported!"); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java (revision 0) @@ -0,0 +1,226 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.Format; + + +/** + * Create documents for the test + */ +public class SimpleDocMaker implements DocMaker { + + private int docID = 0; + private long numBytes = 0; + private long numUniqueBytes = 0; + + protected Config config; + private int nextDocTextPosition = 0; // for creating docs of fixed size. + + protected Field.Store storeVal = Field.Store.NO; + protected Field.Index indexVal = Field.Index.TOKENIZED; + protected Field.TermVector termVecVal = Field.TermVector.NO; + + private static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org + "Well it may be a little dramatic but sometimes it true. " + + "If you call the emergency medical services to an incident, " + + "your actions have started the chain of survival. " + + "You have acted to help someone you may not even know. " + + "First aid is helping, first aid is making that call, " + + "putting a Band-Aid on a small wound, controlling bleeding in large " + + "wounds or providing CPR for a collapsed person whose not breathing " + + "and heart has stopped beating. You can help yourself, your loved " + + "ones and the stranger whose life may depend on you being in the " + + "right place at the right time with the right knowledge."; + + private static int DOC_TEXT_LENGTH = DOC_TEXT.length(); + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument() + */ + public Document makeDocument () throws Exception { + return makeDocument(0); + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int) + */ + public Document makeDocument(int size) throws Exception { + int docid = newdocid(); + Document doc = new Document(); + doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal)); + String docText = createDocText(size); + doc.add(new Field("body", "synthetic body text"+docid+" "+docText, storeVal, indexVal, termVecVal)); + addBytes(docText.length()); // should multiply by 2 here? + return doc; + } + + private synchronized int[] nextDocText(int fixedDocSize) { + int from = nextDocTextPosition; + int to = nextDocTextPosition; + int wraps = 0; + int size = 0; + + while (size 0) { + sb.append(DOC_TEXT.substring(from)); + from = 0; + } + sb.append(DOC_TEXT.substring(from,to)); + return sb.toString(); + } + + // return a new docid + private synchronized int newdocid() { + return docID++; + } + + /* (non-Javadoc) + * @see DocMaker#setConfig(java.util.Properties) + */ + public void setConfig(Config config) { + this.config = config; + boolean stored = config.get("stored",false); + boolean tokenized = config.get("tokenized",true); + boolean termVec = config.get("term.vector",false); + storeVal = (stored ? Field.Store.YES : Field.Store.NO); + indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED); + termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO); + } + + /* + * (non-Javadoc) + * @see DocMaker#resetIinputs() + */ + public synchronized void resetInputs() { + printDocStatistics(); + docID = 0; + numBytes = 0; + } + + /* + * (non-Javadoc) + * @see DocMaker#numUniqueTexts() + */ + public int numUniqueTexts() { + return 0; // not applicable + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes() + */ + public long numUniqueBytes() { + return numUniqueBytes; + } + + /* + * (non-Javadoc) + * @see DocMaker#getCount() + */ + public int getCount() { + return docID; + } + + /* + * (non-Javadoc) + * @see DocMaker#getByteCount() + */ + public long getByteCount() { + return numBytes; + } + + protected void addUniqueBytes (long n) { + numUniqueBytes += n; + } + + protected void addBytes (long n) { + numBytes += n; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics() + */ + private int lastPrintedNumUniqueTexts = 0; + private long lastPrintedNumUniqueBytes = 0; + private int printNum = 0; + public void printDocStatistics() { + boolean print = false; + String col = " "; + StringBuffer sb = new StringBuffer(); + String newline = System.getProperty("line.separator"); + sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); + int nut = numUniqueTexts(); + if (nut > lastPrintedNumUniqueTexts) { + print = true; + sb.append("total bytes of unique texts: ").append(Format.format(0,nut,col)).append(newline); + lastPrintedNumUniqueTexts = nut; + } + long nub = numUniqueBytes(); + if (nub > lastPrintedNumUniqueBytes) { + print = true; + sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline); + lastPrintedNumUniqueBytes = nub; + } + if (getCount()>0) { + print = true; + sb.append("num files added since last inputs reset: ").append(Format.format(0,getCount(),col)).append(newline); + sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline); + } + if (print) { + System.out.println(sb.append(newline).toString()); + printNum++; + } + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java (revision 0) @@ -0,0 +1,103 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.BooleanClause.Occur; + + +public class SimpleQueryMaker implements QueryMaker { + + private int qnum = 0; + private Query queries[]; + private Config config; + + private void prepareQueries() throws Exception { + // analyzer (default is standard analyzer) + Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer", + "org.apache.lucene.analysis.StandardAnalyzer")).newInstance(); + + QueryParser qp = new QueryParser("body",anlzr); + ArrayList qq = new ArrayList(); + Query q1 = new TermQuery(new Term("docid","doc2")); + qq.add(q1); + Query q2 = new TermQuery(new Term("body","simple")); + qq.add(q2); + BooleanQuery bq = new BooleanQuery(); + bq.add(q1,Occur.MUST); + bq.add(q2,Occur.MUST); + qq.add(bq); + qq.add(qp.parse("synthetic body")); + qq.add(qp.parse("\"synthetic body\"")); + qq.add(qp.parse("synthetic text")); + qq.add(qp.parse("\"synthetic text\"")); + qq.add(qp.parse("\"synthetic text\"~3")); + qq.add(qp.parse("zoom*")); + qq.add(qp.parse("synth*")); + queries = (Query []) qq.toArray(new Query[0]); + } + + public Query makeQuery() throws Exception { + return queries[nextQnum()]; + } + + public void setConfig(Config config) throws Exception { + this.config = config; + prepareQueries(); + } + + public void resetInputs() { + qnum = 0; + } + + // return next qnum + private synchronized int nextQnum() { + int res = qnum; + qnum = (qnum+1) % queries.length; + return res; + } + + public String printQueries() { + String newline = System.getProperty("line.separator"); + StringBuffer sb = new StringBuffer(); + if (queries != null) { + for (int i = 0; i < queries.length; i++) { + sb.append(i+". "+queries[i].getClass().getSimpleName()+" - "+queries[i].toString()); + sb.append(newline); + } + } + return sb.toString(); + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.feeds.QueryMaker#makeQuery(int) + */ + public Query makeQuery(int size) throws Exception { + throw new Exception(this+".makeQuery(int size) is not supported!"); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/package.html (revision 0) @@ -0,0 +1,432 @@ + + + + Benchmarking Lucene By Tasks + + +
+Benchmarking Lucene By Tasks. +

+This package provides "task based" performance benchmarking of Lucene. +One can use the predefined benchmarks, or create new ones. +

+

+Contained packages: +

+ + + + + + + + + + + + + + + + + + + + + + +
PackageDescription
statsStatistics maintained when running benchmark tasks.
tasksBenchmark tasks.
feedsSources foe benchmark inputs: documents and queries.
utilsUtilities used for the benchmark, and for the reports.
+ +

Table Of Contents

+

+

    +
  1. Benchmarking By Tasks
  2. +
  3. How to use
  4. +
  5. Benchmark "algorithm"
  6. +
  7. Supported tasks/commands
  8. +
  9. Benchmark properties
  10. +
  11. Example input algorithm and the result benchmark report.
  12. +
+

+ +

Benchmarking By Tasks

+

+Benchmark Lucene using task primitives. +

+ +

+A benchmark is composed of some predefined tasks, allowing for creating an index, adding documents, +optimizing, searching, generating reports, and more. A benchmark run takes an "algorithm" file +that describes the sequence of tasks making up the run, and a properties file defining a few +additional characteristics of the benchmark run. +

+ + +

How to use

+

+Predefined benchmarks are ran using the predefined ant tasks: +

    +
  • ant run-task-standard +
  • any run-task-micro-standard +
+ +

+You can create your own benchmark by modifying one of the predefined .alg and .properties +files and using the appropriate ant target, or by providing your own .alg and .properties files. +In this case, you should run the class apache.lucene.taskBenchmark.Benchmark and provide +the two arguments: file.properties file.alg. +

+

+It is very likely that this would be sufficient for defining the benchmark you need, +otherwise, you can extend the framework to meet your needs, as explained herein. +

+ +

+Each benchmark run has a DocMaker and a QueryMaker. These two should usually match, so +that "meaningful" queries are used for a certain collection. You can modify +the properties file to define which "makers" should be used. You can also +specify your own, extending the DocMaker and QureyMaker interfaces. +

+ +

+Benchmark .alg file contains the benchmark "algorithm". The syntax is described below. +Within the algorithm, you can specify groups of commands, assign them names, specify commands that should be repeated, +do commands in serial or in parallel, and also control the speed of "firing" the commands. +

+

+This allows, for instance, to specify +that an index should be opened for update, +documents should be added to it one by one but not faster than 20 docs a minute, +and, in parallel with this, +some N queries should be searched against that index, +again, no more than 2 queries a second. +You can have the searches all share an index searcher, +or have them each open its own searcher and close it afterwords. +

+ +

+If the commands available for use in the algorithm do not meet your needs, +you can add commands by adding a new task under +org.apache.lucene.taskBenchmark.task - +you should extend the PerfTask abstract class. +Make sure that your new task class name is suffixed by Task. +Assume you added the class "WonderfulTask" - doing so also enables the +command "Wonderful" to be used in the algorithm. +

+ + +

Benchmark "algorithm"

+ +

+The following is an informal description of the supported syntax. +

+ +
    +
  1. + Measuring: When a command is executed, statistics for the elapsed execution time and memory consumption are collected. + At any time, those statistics can be printed, using one of the available ReportTasks. +
  2. +
  3. + Comments start with '#'. +
  4. +
  5. + Serial sequences are enclosed within '{ }'. +
  6. +
  7. + Parallel sequences are enclosed within '[ ]' +
  8. +
  9. + Sequence naming: To name a sequence, put '"name"' just after '{' or '['. +
    Example - { "ManyAdds" AddDoc } : 1000000 - would + name the sequence of 1M add docs "ManyAdds", and this name would later appear in statistic reports. + If you don't specify a name for a sequence, it is given one: you can see it as the + algorithm is printed just before benchmark execution starts. +
  10. +
  11. + Repeating: + To repeat sequence tasks N times, add ': N' just after the + sequence closing tag - '}' or ']' or '>'. +
    Example - [ AddDoc ] : 4 - would do 4 addDoc in parallel, spawning 4 threads at once. +
    Example - [ AddDoc AddDoc ] : 4 - would do 8 addDoc in parallel, spawning 8 threads at once. +
    Example - { AddDoc } : 30 - would do addDoc 30 times in a row. +
    Example - { AddDoc AddDoc } : 30 - would do addDoc 60 times in a row. +
  12. +
  13. + Command parameter: a command can take a single parameter. + If the certain command does not support a parameter, or if the parameter is of the wrong type, + reading the algorithm will fail with an exception and the test would not start. + Currently only AddDoc supports a (numeric) parameter, which indicates the required size of added document. + If the DocMaker implementation used in the test does not support makeDoc(size), an exception would be thrown and the test would fail. +
    Example - AddDoc(2000) - would add a document of size 2000 (~bytes). +
    See conf/task-sample.alg for how this can be used, for instance, to check which is faster, adding + many smaller documents, or few larger documents. + Next candidates for supporting a parameter may be the Search tasks, for controlling the qurey size. +
  14. +
  15. + Statistic recording elimination: - a sequence can also end with '>', + in which case child tasks would not store their statistics. + This can be useful to avoid exploding stats data, for adding say 1M docs. +
    Example - { "ManyAdds" AddDoc > : 1000000 - + would add million docs, measure that total, but not save stats for each addDoc. +
    Notice that the granularity of System.currentTimeMillis() (which is used here) is system dependant, + and in some systems an operation that takes 5 ms to complete may show 0 ms latency time in performance measurements. + Therefore it is sometimes more accurate to look at the elapsed time of a larger sequence, as demonstrated here. +
  16. +
  17. + Rate: + To set a rate (ops/sec or ops/min) for a sequence, add ': N : R' just after sequence closing tag. + This would specify repetition of N with rate of R operations/sec. + Use 'R/sec' or 'R/min' + to explicitely specify that the rate is per second or per minute. + The default is per second, +
    Example - [ AddDoc ] : 400 : 3 - would do 400 addDoc in parallel, starting up to 3 threads per second. +
    Example - { AddDoc } : 100 : 200/min - would do 100 addDoc serially, + waiting before starting next add, if otherwise rate would exceed 200 adds/min. +
  18. +
  19. + Command names: Each class "AnyNameTask" in the package org.apache.lucene.taskBenchmark.task, + that extends PerfTask, is supported as command "AnyName" that can be + used in the benchmark "algorithm" description. + This allows to add new commands by just adding such classes. +
  20. +
+ + + +

Supported tasks/commands

+ +

+Existing tasks can be divided into a few groups: +regular index/search work tasks, report tasks, and control tasks. +

+ +
    + +
  1. + Report tasks: There are a few Report commands for generating reports. + Only task runs that were completed are reported. + (The 'Report tasks' themselves are not measured and not reported.) +
      +
    • + RepAll - all (completed) task runs. +
    • +
    • + RepSumByName - all statistics, aggregated by name. So, if AddDoc was executed 2000 times, + only 1 report line would be created for it, aggregating all those 2000 statistic records. +
    • +
    • + RepSelectByPref   prefixWord - all records for tasks whose name start with prefixWord. +
    • +
    • + RepSumByPref   prefixWord - all records for tasks whose name start with prefixWord, + aggregated by their full task name. +
    • +
    • + RepSumByNameRound - all statistics, aggregated by name and by Round. + So, if AddDoc was executed 2000 times in each of 3 rounds, 3 report lines would be created for it, + aggregating all those 2000 statistic records in each round. See more about rounds in the NewRound command description below. +
    • +
    • + RepSumByPrefRound   prefixWord - similar to RepSumByNameRound, + just that only tasks whose name starts with prefixWord are included. +
    • +
    + If needed, additional reports can be added by extending the abstract class ReportTask, and by + manipulating the statistics data in Points and TaskStats. +
  2. + +
  3. Control tasks: Few of the tasks control the benchmark algorithm all over: +
      +
    • + ClearStats - clears the entire statistics. + Further reports would only include task runs that would start after this call. +
    • +
    • + NewRound - virtually start a new round of performance test. + Although this command can be placed anywhere, it mostly makes sense at the end of an outermost sequence. +
      This increments a global "round counter". All task runs that would start now would + record the new, updated round counter as their round number. This would appear in reports. + In particular, see RepSumByNameRound above. +
      An additional effect of NewRound, is that numeric and boolean properties defined in the + .properties file as a sequence of values, e.g. merge.factor=mrg.10.100.10.100 would + increment (cyclic) to the next value. + Note: this would also be reflected in the reports, in this case under a column that would be named "mrg". +
    • +
    • + ResetInputs - DocMaker and the various QueryMakers + would reset their counters to start. + The way these Maker interfaces work, each call for makeDocument() + or makeQuery() creates the next document or query + that it "knows" to create. + If that pool is "exhausted", the "maker" start over again. The resetInpus command + therefore allows to make the rounds comparable. + It is therefore useful to invoke ResetInputs together with NewRound. +
    • +
    • + ResetSystemErase - reset all index and input data and call gc. + Does NOT reset statistics. This contains ResetInputs. + All writers/readers are nullified, deleted, closed. + Index is erased. + Directory is erased. + You would have to call CreateIndex once this was called... +
    • +
    • + ResetSystemSoft - reset all index and input data and call gc. + Does NOT reset statistics. This contains ResetInputs. + All writers/readers are nullified, closed. + Index is NOT erased. + Directory is NOT erased. + This is useful for testing performance on an existing index, for instance if the construction of a large index + took a very long time and now you would to test its search or update performance. +
    • +
    +
  4. + +
  5. + Other existing tasks are quite straightforward and would just be briefly described here. +
      +
    • + CreateIndex and OpenIndex both leave the index open for later update operations. + CloseIndex would close it. +
    • +
    • + OpenReader, similarly, would leave an index reader open for later search operations. + But this have further semantics. + If a Read operation is performed, and an open reader exists, it would be used. + Otherwise, the read operation would open its own reader and close it when the read operation is done. + This allows testing various scenarios - sharing a reader, searching with "cold" reader, with "warmed" reader, etc. + The read operations affected by this are: Warm, + Search, SearchTrav (search and traverse), + and SearchTravRet (search and traverse and retrieve). + Notice that each of the 3 search task types maintains its own queryMaker instance. +
    • +
    +
+ + +

Benchmark properties

+ +

+Properties are read from the .properties file, and +define several parameters of the performance test. +As mentioned above for the NewRound task, +numeric and boolean properties that are defined as a sequence +of values, e.g. merge.factor=mrg.10.100.10.100 +would increment (cyclic) to the next value, when NewRound is called, and would also +appear as a named column in the reports (column name would be "mrg" in this example). +

+ +

+Some of the currently defined properties are: +

+ +
    +
  1. + analyzer - full class name for the analyzer to use. + Same analyzer would be used in the entire test. +
  2. + +
  3. + directory - valid values are FSDirectory and RAMDirectory. + This tells which directory to use for the performance test. +
  4. + +
  5. + Index work parameters: + Multi int/boolean values would be iterated with calls to NewRound. + There would be also added as columns in the reports, first string in the + sequence is the column name. + (Make sure it is no shorter than any value in the sequence). +
      +
    • max.buffered +
      Example: buffered=buf.10.10.100.100 - + this would define using maxBufferedDocs of 10 in iterations 0 and 1, + and 100 in iterations 2 and 3. +
    • +
    • + merge.factor - which + merge factor to use. +
    • +
    • + compound - whether the index is + using the compound format or not. Valid values are "true" and "false". +
    • +
    +
+ +

+For additional defined properties see the task*.properties file under conf. +

+ + +

Example input algorithm and the result benchmark report

+

+The following example is in conf/task-sample.alg: +

+# --------------------------------------------------------
+#
+# Sample: what is the effect of doc size on indexing time?
+#
+# There are two parts in this test:
+# - PopulateShort adds 2N documents of length  L
+# - PopulateLong  adds  N documents of length 2L
+# Which one would be faster?
+# The comparison is done twice.
+#
+# --------------------------------------------------------
+
+{
+
+    { "PopulateShort"
+        CreateIndex
+        { AddDoc(4000) > : 20000
+        Optimize
+        CloseIndex
+    >
+
+    ResetSystemErase
+
+    { "PopulateLong"
+        CreateIndex
+        { AddDoc(8000) > : 10000
+        Optimize
+        CloseIndex
+    >
+
+    ResetSystemErase
+
+} : 2
+
+RepSelectByPref Populate
+
+

+

+The output report from running this test is the following: +

+Operation     round cmpnd buf mrg   runCnt   recsPerRun        rec/s  elapsedSec    avgUsedMem    avgTotalMem
+PopulateShort     0  true  10  10        1        20003        106.2      188.36     1,664,232      4,194,304
+PopulateLong -  - 0  true  10  10 -  -   1 -  -   10003 -  -  - 89.6 -  - 111.69 -   2,257,112 -  - 4,194,304
+PopulateShort     0  true  10  10        1        20003        107.5      186.14     2,972,088      4,194,304
+PopulateLong -  - 0  true  10  10 -  -   1 -  -   10003 -  -  - 85.9 -  - 116.42 -   2,980,024 -  - 4,194,304
+
+

+
+
 
+ + Index: src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (revision 0) @@ -0,0 +1,219 @@ +package org.apache.lucene.benchmark.byTask; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.benchmark.byTask.feeds.DocMaker; +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; +import org.apache.lucene.benchmark.byTask.stats.Points; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.FileUtils; + + +/** + * Data maintained by a performance test run. + *

+ * Data includes: + *

    + *
  • Configuration. + *
  • Directory, Writer, Reader. + *
  • Docmaker and a few instances of QueryMaker. + *
  • Analyzer. + *
  • Statistics data which updated during the run. + *
+ */ +public class PerfRunData { + + private Points points; + + // objects used during performance test run + // directory, analyzer, docMaker - created at startup. + // reader, writer, searcher - maintained by basic tasks. + private Directory directory; + private Analyzer analyzer; + private DocMaker docMaker; + private QueryMaker searchQueryMaker; + private QueryMaker searchTravQueryMaker; + private QueryMaker searchTravRetQueryMaker; + + private IndexReader indexReader; + private IndexWriter indexWriter; + private Config config; + + // constructor + public PerfRunData (Config config) throws Exception { + this.config = config; + // analyzer (default is standard analyzer) + analyzer = (Analyzer) Class.forName(config.get("analyzer", + "org.apache.lucene.analysis.StandardAnalyzer")).newInstance(); + // doc maker + docMaker = (DocMaker) Class.forName(config.get("doc.maker", + "org.apache.lucene.benchmark.byTask.utils.SimpleDocMaker")).newInstance(); + docMaker.setConfig(config); + // query makers + // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately. + Class qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.utils.SimpleQueryMaker")); + searchQueryMaker = (QueryMaker) qmkrClass.newInstance(); + searchQueryMaker.setConfig(config); + searchTravQueryMaker = (QueryMaker) qmkrClass.newInstance(); + searchTravQueryMaker.setConfig(config); + searchTravRetQueryMaker = (QueryMaker) qmkrClass.newInstance(); + searchTravRetQueryMaker.setConfig(config); + // index stuff + reinit(false); + + // statistic points + points = new Points(config); + } + + // clean old stuff, reopen + public void reinit(boolean eraseIndex) throws Exception { + + // cleanup index + if (indexWriter!=null) { + indexWriter.close(); + indexWriter = null; + } + if (indexReader!=null) { + indexReader.close(); + indexReader = null; + } + if (directory!=null) { + directory.close(); + } + + // directory (default is ram-dir). + if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) { + File workDir = new File("work"); + File indexDir = new File(workDir,"index"); + if (eraseIndex && indexDir.exists()) { + FileUtils.fullyDelete(indexDir); + } + indexDir.mkdirs(); + directory = FSDirectory.getDirectory(indexDir, eraseIndex); + } else { + directory = new RAMDirectory(); + } + + // inputs + resetInputs(); + + // release unused stuff + System.runFinalization(); + System.gc(); + } + + /** + * @return Returns the points. + */ + public Points getPoints() { + return points; + } + + /** + * @return Returns the directory. + */ + public Directory getDirectory() { + return directory; + } + + /** + * @param directory The directory to set. + */ + public void setDirectory(Directory directory) { + this.directory = directory; + } + + /** + * @return Returns the indexReader. + */ + public IndexReader getIndexReader() { + return indexReader; + } + + /** + * @param indexReader The indexReader to set. + */ + public void setIndexReader(IndexReader indexReader) { + this.indexReader = indexReader; + } + + /** + * @return Returns the indexWriter. + */ + public IndexWriter getIndexWriter() { + return indexWriter; + } + + /** + * @param indexWriter The indexWriter to set. + */ + public void setIndexWriter(IndexWriter indexWriter) { + this.indexWriter = indexWriter; + } + + /** + * @return Returns the anlyzer. + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * @return Returns the docMaker. + */ + public DocMaker getDocMaker() { + return docMaker; + } + + /** + * @return Returns the config. + */ + public Config getConfig() { + return config; + } + + public void resetInputs() { + docMaker.resetInputs(); + searchQueryMaker.resetInputs(); + searchTravQueryMaker.resetInputs(); + searchTravRetQueryMaker.resetInputs(); + } + + /** + * @return Returns the searchQueryMaker. + */ + public QueryMaker getSearchQueryMaker() { + return searchQueryMaker; + } + + public QueryMaker getSearchTravQueryMaker() { + return searchTravQueryMaker; + } + + public QueryMaker getSearchTravRetQueryMaker() { + return searchTravRetQueryMaker; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java (revision 0) @@ -0,0 +1,192 @@ +package org.apache.lucene.benchmark.byTask.stats; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.tasks.PerfTask; + +/** + * Statistics for a task run. + *
The same task can run more than once, but, if that task records statistics, + * each run would create its own TaskStats. + */ +public class TaskStats implements Cloneable { + + /** task for which data was collected */ + private PerfTask task; + + /** round in which task run started */ + private int round; + + /** task start time */ + private long start; + + /** task elapsed time. elapsed >= 0 indicates run completion! */ + private long elapsed = -1; + + /** max tot mem during task */ + private long maxTotMem; + + /** max used mem during task */ + private long maxUsedMem; + + /** serial run number of this task run in the perf run */ + private int taskRunNum; + + /** number of other tasks that started to run while this task was still running */ + private int numParallelTasks; + + /** number of work items done by this task. + * For indexing that can be number of docs added. + * For warming that can be number of scanned items, etc. + * For repeating tasks, this is a sum over repetitions. + */ + private int count; + + /** Number of similar tasks aggregated into this record. + * Used when summing up on few runs/instances of similar tasks. + */ + private int numRuns = 1; + + /** + * Create a run data for a task that is starting now. + * To be called from Points. + */ + TaskStats (PerfTask task, int taskRunNum, int round) { + this.task = task; + this.taskRunNum = taskRunNum; + this.round = round; + maxTotMem = Runtime.getRuntime().totalMemory(); + maxUsedMem = maxTotMem - Runtime.getRuntime().freeMemory(); + start = System.currentTimeMillis(); + } + + /** + * mark the end of a task + */ + void markEnd (int numParallelTasks, int count) { + elapsed = System.currentTimeMillis() - start; + long totMem = Runtime.getRuntime().totalMemory(); + if (totMem > maxTotMem) { + maxTotMem = totMem; + } + long usedMem = totMem - Runtime.getRuntime().freeMemory(); + if (usedMem > maxUsedMem) { + maxUsedMem = usedMem; + } + this.numParallelTasks = numParallelTasks; + this.count = count; + } + + /** + * @return the taskRunNum. + */ + public int getTaskRunNum() { + return taskRunNum; + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + StringBuffer res = new StringBuffer(task.getName()); + res.append(" "); + res.append(count); + res.append(" "); + res.append(elapsed); + return res.toString(); + } + + /** + * @return Returns the count. + */ + public int getCount() { + return count; + } + + /** + * @return elapsed time. + */ + public long getElapsed() { + return elapsed; + } + + /** + * @return Returns the maxTotMem. + */ + public long getMaxTotMem() { + return maxTotMem; + } + + /** + * @return Returns the maxUsedMem. + */ + public long getMaxUsedMem() { + return maxUsedMem; + } + + /** + * @return Returns the numParallelTasks. + */ + public int getNumParallelTasks() { + return numParallelTasks; + } + + /** + * @return Returns the task. + */ + public PerfTask getTask() { + return task; + } + + /** + * @return Returns the numRuns. + */ + public int getNumRuns() { + return numRuns; + } + + /** + * Add data from another stat, for aggregation + * @param stat2 the added stat data. + */ + public void add(TaskStats stat2) { + numRuns += stat2.getNumRuns(); + elapsed += stat2.getElapsed(); + maxTotMem += stat2.getMaxTotMem(); + maxUsedMem += stat2.getMaxUsedMem(); + count += stat2.getCount(); + if (round != stat2.round) { + round = -1; // no meaning if agregating tasks of different ruond. + } + } + + /* (non-Javadoc) + * @see java.lang.Object#clone() + */ + protected Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + /** + * @return the round number. + */ + int getRound() { + return round; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/stats/Report.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/Report.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/stats/Report.java (revision 0) @@ -0,0 +1,64 @@ +package org.apache.lucene.benchmark.byTask.stats; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Textual report of current statistics. + */ +public class Report { + + private String text; + private int size; + private int outOf; + private int reported; + + Report (String text, int size, int reported, int outOf) { + this.text = text; + this.size = size; + this.reported = reported; + this.outOf = outOf; + } + + /** + * Returns total number of stats points when this report was created. + */ + public int getOutOf() { + return outOf; + } + + /** + * Returns number of lines in the reoprt. + */ + public int getSize() { + return size; + } + + /** + * Returns the report text. + */ + public String getText() { + return text; + } + + /** + * Returns number of stats points represented in this report. + */ + public int getReported() { + return reported; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/stats/Points.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/Points.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/stats/Points.java (revision 0) @@ -0,0 +1,343 @@ +package org.apache.lucene.benchmark.byTask.stats; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashMap; + +import org.apache.lucene.benchmark.byTask.tasks.PerfTask; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.Format; + + +/** + * Test run data points collected as the test proceeds. + */ +public class Points { + + private Config config; + + private static final String newline = System.getProperty("line.separator"); + + // stat points ordered by their start time. + // for now we collect points as TaskStats objects. + // later might optimize to collect only native data. + private ArrayList points = new ArrayList(); + + private int nextTaskRunNum = 0; + + /** + * Get a textual summary of the benchmark results, average from all test runs. + */ + static final String OP = "Operation "; + static final String ROUND = " round"; + static final String RUNCNT = " runCnt"; + static final String RECCNT = " recsPerRun"; + static final String RECSEC = " rec/s"; + static final String ELAPSED = " elapsedSec"; + static final String USEDMEM = " avgUsedMem"; + static final String TOTMEM = " avgTotalMem"; + static final String COLS[] = { + RUNCNT, + RECCNT, + RECSEC, + ELAPSED, + USEDMEM, + TOTMEM + }; + + /** + * Create a Points statistics object. + */ + public Points (Config config) { + this.config = config; + } + + private String tableTitle (String longestOp) { + StringBuffer sb = new StringBuffer(); + sb.append(Format.format(OP,longestOp)); + sb.append(ROUND); + sb.append(config.getColsNamesForValsByRound()); + for (int i = 0; i < COLS.length; i++) { + sb.append(COLS[i]); + } + return sb.toString(); + } + + /** + * Report detailed statistics as a string + * @return the report + */ + public Report reportAll() { + String longestOp = longestOp(points); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longestOp)); + sb.append(newline); + int reported = 0; + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (stat.getElapsed()>=0) { // consider only tasks that ended + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longestOp, stat); + reported++; + if (points.size()>2&& reported%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,reported,reported,points.size()); + } + + /** + * Report statistics as a string, aggregate for tasks named the same. + * @return the report + */ + public Report reportSumByName() { + // aggregate by task name + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0) { // consider only tasks that ended + reported++; + String name = stat1.getTask().getName(); + TaskStats stat2 = (TaskStats) p2.get(name); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(name,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genReportFromList(reported, p2); + } + + /** + * Report statistics as a string, aggregate for tasks named the same, and from the same round. + * @return the report + */ + public Report reportSumByNameRound() { + // aggregate by task name and round + LinkedHashMap p2 = new LinkedHashMap(); + int reported = 0; + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0) { // consider only tasks that ended + reported++; + String name = stat1.getTask().getName(); + String rname = stat1.getRound()+"."+name; // group by round + TaskStats stat2 = (TaskStats) p2.get(rname); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(rname,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genReportFromList(reported, p2); + } + + private String longestOp(Collection c) { + String longest = OP; + for (Iterator it = c.iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (stat.getElapsed()>=0) { // consider only tasks that ended + String name = stat.getTask().getName(); + if (name.length() > longest.length()) { + longest = name; + } + } + } + return longest; + } + + private String taskReportLine(String longestOp, TaskStats stat) { + PerfTask task = stat.getTask(); + StringBuffer sb = new StringBuffer(); + sb.append(Format.format(task.getName(), longestOp)); + String round = (stat.getRound()>=0 ? ""+stat.getRound() : "-"); + sb.append(Format.formatPaddLeft(round, ROUND)); + sb.append(config.getColsValuesForValsByRound(stat.getRound())); + sb.append(Format.format(stat.getNumRuns(), RUNCNT)); + sb.append(Format.format(stat.getCount() / stat.getNumRuns(), RECCNT)); + long elapsed = (stat.getElapsed()>0 ? stat.getElapsed() : 1); // assume at least 1ms + sb.append(Format.format(1,(float) (stat.getCount() * 1000.0 / elapsed), RECSEC)); + sb.append(Format.format(2, (float) stat.getElapsed() / 1000, ELAPSED)); + sb.append(Format.format(0, (float) stat.getMaxUsedMem() / stat.getNumRuns(), USEDMEM)); + sb.append(Format.format(0, (float) stat.getMaxTotMem() / stat.getNumRuns(), TOTMEM)); + return sb.toString(); + } + + public Report reportSumByPrefix(String prefix) { + // aggregate by task name + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0 && stat1.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + String name = stat1.getTask().getName(); + TaskStats stat2 = (TaskStats) p2.get(name); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(name,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genReportFromList(reported, p2); + } + + public Report reportSumByPrefixRound(String prefix) { + // aggregate by task name and by round + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0 && stat1.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + String name = stat1.getTask().getName(); + String rname = stat1.getRound()+"."+name; // group by round + TaskStats stat2 = (TaskStats) p2.get(rname); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(rname,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genReportFromList(reported, p2); + } + + private Report genReportFromList(int reported, LinkedHashMap p2) { + String longetOp = longestOp(p2.values()); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longetOp)); + sb.append(newline); + int lineNum = 0; + for (Iterator it = p2.values().iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longetOp,stat); + lineNum++; + if (p2.size()>2&& lineNum%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,p2.size(),reported,points.size()); + } + + public Report reportSelectByPrefix(String prefix) { + String longestOp = longestOp(points); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longestOp)); + sb.append(newline); + int reported = 0; + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (stat.getElapsed()>=0 && stat.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longestOp,stat); + if (points.size()>2&& reported%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,reported,reported, points.size()); + } + + /** + * Mark that a task is starting. + * Create a task stats for it and store it as a point. + * @param task the starting task. + * @return the new task stats created for the starting task. + */ + public synchronized TaskStats markTaskStart (PerfTask task, int round) { + TaskStats stats = new TaskStats(task, nextTaskRunNum(), round); + points.add(stats); + return stats; + } + + // return next task num + private synchronized int nextTaskRunNum() { + return nextTaskRunNum++; + } + + /** + * mark the end of a task + */ + public synchronized void markTaskEnd (TaskStats stats, int count) { + int numParallelTasks = nextTaskRunNum - 1 - stats.getTaskRunNum(); + // note: if the stats were cleared, might be that this stats object is + // no longer in points, but this is just ok. + stats.markEnd(numParallelTasks, count); + } + + /** + * Clear all data, prepare for more tests. + */ + public void clearData() { + points.clear(); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/stats/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/stats/package.html (revision 0) @@ -0,0 +1,5 @@ + + + Statistics maintained when running benchmark tasks. + + Index: src/java/org/apache/lucene/benchmark/byTask/stats/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/stats/package.html (revision 0) @@ -0,0 +1,5 @@ + + + Statistics maintained when running benchmark tasks. + + Index: src/java/org/apache/lucene/benchmark/byTask/stats/Points.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/Points.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/stats/Points.java (revision 0) @@ -0,0 +1,343 @@ +package org.apache.lucene.benchmark.byTask.stats; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashMap; + +import org.apache.lucene.benchmark.byTask.tasks.PerfTask; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.Format; + + +/** + * Test run data points collected as the test proceeds. + */ +public class Points { + + private Config config; + + private static final String newline = System.getProperty("line.separator"); + + // stat points ordered by their start time. + // for now we collect points as TaskStats objects. + // later might optimize to collect only native data. + private ArrayList points = new ArrayList(); + + private int nextTaskRunNum = 0; + + /** + * Get a textual summary of the benchmark results, average from all test runs. + */ + static final String OP = "Operation "; + static final String ROUND = " round"; + static final String RUNCNT = " runCnt"; + static final String RECCNT = " recsPerRun"; + static final String RECSEC = " rec/s"; + static final String ELAPSED = " elapsedSec"; + static final String USEDMEM = " avgUsedMem"; + static final String TOTMEM = " avgTotalMem"; + static final String COLS[] = { + RUNCNT, + RECCNT, + RECSEC, + ELAPSED, + USEDMEM, + TOTMEM + }; + + /** + * Create a Points statistics object. + */ + public Points (Config config) { + this.config = config; + } + + private String tableTitle (String longestOp) { + StringBuffer sb = new StringBuffer(); + sb.append(Format.format(OP,longestOp)); + sb.append(ROUND); + sb.append(config.getColsNamesForValsByRound()); + for (int i = 0; i < COLS.length; i++) { + sb.append(COLS[i]); + } + return sb.toString(); + } + + /** + * Report detailed statistics as a string + * @return the report + */ + public Report reportAll() { + String longestOp = longestOp(points); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longestOp)); + sb.append(newline); + int reported = 0; + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (stat.getElapsed()>=0) { // consider only tasks that ended + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longestOp, stat); + reported++; + if (points.size()>2&& reported%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,reported,reported,points.size()); + } + + /** + * Report statistics as a string, aggregate for tasks named the same. + * @return the report + */ + public Report reportSumByName() { + // aggregate by task name + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0) { // consider only tasks that ended + reported++; + String name = stat1.getTask().getName(); + TaskStats stat2 = (TaskStats) p2.get(name); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(name,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genReportFromList(reported, p2); + } + + /** + * Report statistics as a string, aggregate for tasks named the same, and from the same round. + * @return the report + */ + public Report reportSumByNameRound() { + // aggregate by task name and round + LinkedHashMap p2 = new LinkedHashMap(); + int reported = 0; + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0) { // consider only tasks that ended + reported++; + String name = stat1.getTask().getName(); + String rname = stat1.getRound()+"."+name; // group by round + TaskStats stat2 = (TaskStats) p2.get(rname); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(rname,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genReportFromList(reported, p2); + } + + private String longestOp(Collection c) { + String longest = OP; + for (Iterator it = c.iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (stat.getElapsed()>=0) { // consider only tasks that ended + String name = stat.getTask().getName(); + if (name.length() > longest.length()) { + longest = name; + } + } + } + return longest; + } + + private String taskReportLine(String longestOp, TaskStats stat) { + PerfTask task = stat.getTask(); + StringBuffer sb = new StringBuffer(); + sb.append(Format.format(task.getName(), longestOp)); + String round = (stat.getRound()>=0 ? ""+stat.getRound() : "-"); + sb.append(Format.formatPaddLeft(round, ROUND)); + sb.append(config.getColsValuesForValsByRound(stat.getRound())); + sb.append(Format.format(stat.getNumRuns(), RUNCNT)); + sb.append(Format.format(stat.getCount() / stat.getNumRuns(), RECCNT)); + long elapsed = (stat.getElapsed()>0 ? stat.getElapsed() : 1); // assume at least 1ms + sb.append(Format.format(1,(float) (stat.getCount() * 1000.0 / elapsed), RECSEC)); + sb.append(Format.format(2, (float) stat.getElapsed() / 1000, ELAPSED)); + sb.append(Format.format(0, (float) stat.getMaxUsedMem() / stat.getNumRuns(), USEDMEM)); + sb.append(Format.format(0, (float) stat.getMaxTotMem() / stat.getNumRuns(), TOTMEM)); + return sb.toString(); + } + + public Report reportSumByPrefix(String prefix) { + // aggregate by task name + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0 && stat1.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + String name = stat1.getTask().getName(); + TaskStats stat2 = (TaskStats) p2.get(name); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(name,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genReportFromList(reported, p2); + } + + public Report reportSumByPrefixRound(String prefix) { + // aggregate by task name and by round + int reported = 0; + LinkedHashMap p2 = new LinkedHashMap(); + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat1 = (TaskStats) it.next(); + if (stat1.getElapsed()>=0 && stat1.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + String name = stat1.getTask().getName(); + String rname = stat1.getRound()+"."+name; // group by round + TaskStats stat2 = (TaskStats) p2.get(rname); + if (stat2 == null) { + try { + stat2 = (TaskStats) stat1.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + p2.put(rname,stat2); + } else { + stat2.add(stat1); + } + } + } + // now generate report from secondary list p2 + return genReportFromList(reported, p2); + } + + private Report genReportFromList(int reported, LinkedHashMap p2) { + String longetOp = longestOp(p2.values()); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longetOp)); + sb.append(newline); + int lineNum = 0; + for (Iterator it = p2.values().iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longetOp,stat); + lineNum++; + if (p2.size()>2&& lineNum%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,p2.size(),reported,points.size()); + } + + public Report reportSelectByPrefix(String prefix) { + String longestOp = longestOp(points); + boolean first = true; + StringBuffer sb = new StringBuffer(); + sb.append(tableTitle(longestOp)); + sb.append(newline); + int reported = 0; + for (Iterator it = points.iterator(); it.hasNext();) { + TaskStats stat = (TaskStats) it.next(); + if (stat.getElapsed()>=0 && stat.getTask().getName().startsWith(prefix)) { // only ended tasks with proper name + reported++; + if (!first) { + sb.append(newline); + } + first = false; + String line = taskReportLine(longestOp,stat); + if (points.size()>2&& reported%2==0) { + line = line.replaceAll(" "," - "); + } + sb.append(line); + } + } + String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString()); + return new Report(reptxt,reported,reported, points.size()); + } + + /** + * Mark that a task is starting. + * Create a task stats for it and store it as a point. + * @param task the starting task. + * @return the new task stats created for the starting task. + */ + public synchronized TaskStats markTaskStart (PerfTask task, int round) { + TaskStats stats = new TaskStats(task, nextTaskRunNum(), round); + points.add(stats); + return stats; + } + + // return next task num + private synchronized int nextTaskRunNum() { + return nextTaskRunNum++; + } + + /** + * mark the end of a task + */ + public synchronized void markTaskEnd (TaskStats stats, int count) { + int numParallelTasks = nextTaskRunNum - 1 - stats.getTaskRunNum(); + // note: if the stats were cleared, might be that this stats object is + // no longer in points, but this is just ok. + stats.markEnd(numParallelTasks, count); + } + + /** + * Clear all data, prepare for more tests. + */ + public void clearData() { + points.clear(); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/stats/Report.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/Report.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/stats/Report.java (revision 0) @@ -0,0 +1,64 @@ +package org.apache.lucene.benchmark.byTask.stats; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Textual report of current statistics. + */ +public class Report { + + private String text; + private int size; + private int outOf; + private int reported; + + Report (String text, int size, int reported, int outOf) { + this.text = text; + this.size = size; + this.reported = reported; + this.outOf = outOf; + } + + /** + * Returns total number of stats points when this report was created. + */ + public int getOutOf() { + return outOf; + } + + /** + * Returns number of lines in the reoprt. + */ + public int getSize() { + return size; + } + + /** + * Returns the report text. + */ + public String getText() { + return text; + } + + /** + * Returns number of stats points represented in this report. + */ + public int getReported() { + return reported; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java (revision 0) @@ -0,0 +1,192 @@ +package org.apache.lucene.benchmark.byTask.stats; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.tasks.PerfTask; + +/** + * Statistics for a task run. + *
The same task can run more than once, but, if that task records statistics, + * each run would create its own TaskStats. + */ +public class TaskStats implements Cloneable { + + /** task for which data was collected */ + private PerfTask task; + + /** round in which task run started */ + private int round; + + /** task start time */ + private long start; + + /** task elapsed time. elapsed >= 0 indicates run completion! */ + private long elapsed = -1; + + /** max tot mem during task */ + private long maxTotMem; + + /** max used mem during task */ + private long maxUsedMem; + + /** serial run number of this task run in the perf run */ + private int taskRunNum; + + /** number of other tasks that started to run while this task was still running */ + private int numParallelTasks; + + /** number of work items done by this task. + * For indexing that can be number of docs added. + * For warming that can be number of scanned items, etc. + * For repeating tasks, this is a sum over repetitions. + */ + private int count; + + /** Number of similar tasks aggregated into this record. + * Used when summing up on few runs/instances of similar tasks. + */ + private int numRuns = 1; + + /** + * Create a run data for a task that is starting now. + * To be called from Points. + */ + TaskStats (PerfTask task, int taskRunNum, int round) { + this.task = task; + this.taskRunNum = taskRunNum; + this.round = round; + maxTotMem = Runtime.getRuntime().totalMemory(); + maxUsedMem = maxTotMem - Runtime.getRuntime().freeMemory(); + start = System.currentTimeMillis(); + } + + /** + * mark the end of a task + */ + void markEnd (int numParallelTasks, int count) { + elapsed = System.currentTimeMillis() - start; + long totMem = Runtime.getRuntime().totalMemory(); + if (totMem > maxTotMem) { + maxTotMem = totMem; + } + long usedMem = totMem - Runtime.getRuntime().freeMemory(); + if (usedMem > maxUsedMem) { + maxUsedMem = usedMem; + } + this.numParallelTasks = numParallelTasks; + this.count = count; + } + + /** + * @return the taskRunNum. + */ + public int getTaskRunNum() { + return taskRunNum; + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + StringBuffer res = new StringBuffer(task.getName()); + res.append(" "); + res.append(count); + res.append(" "); + res.append(elapsed); + return res.toString(); + } + + /** + * @return Returns the count. + */ + public int getCount() { + return count; + } + + /** + * @return elapsed time. + */ + public long getElapsed() { + return elapsed; + } + + /** + * @return Returns the maxTotMem. + */ + public long getMaxTotMem() { + return maxTotMem; + } + + /** + * @return Returns the maxUsedMem. + */ + public long getMaxUsedMem() { + return maxUsedMem; + } + + /** + * @return Returns the numParallelTasks. + */ + public int getNumParallelTasks() { + return numParallelTasks; + } + + /** + * @return Returns the task. + */ + public PerfTask getTask() { + return task; + } + + /** + * @return Returns the numRuns. + */ + public int getNumRuns() { + return numRuns; + } + + /** + * Add data from another stat, for aggregation + * @param stat2 the added stat data. + */ + public void add(TaskStats stat2) { + numRuns += stat2.getNumRuns(); + elapsed += stat2.getElapsed(); + maxTotMem += stat2.getMaxTotMem(); + maxUsedMem += stat2.getMaxUsedMem(); + count += stat2.getCount(); + if (round != stat2.round) { + round = -1; // no meaning if agregating tasks of different ruond. + } + } + + /* (non-Javadoc) + * @see java.lang.Object#clone() + */ + protected Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + /** + * @return the round number. + */ + int getRound() { + return round; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java (revision 0) @@ -0,0 +1,48 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; + +/** + * Search and Travrese and Retrieve docs task. + */ +public class SearchTravRetTask extends ReadTask { + + public boolean withRetrieve() { + return true; + } + + public boolean withSearch() { + return true; + } + + public boolean withTraverse() { + return true; + } + + public boolean withWarm() { + return false; + } + + public QueryMaker getQueryMaker() { + return getRunData().getSearchTravQueryMaker(); + } + + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 0) @@ -0,0 +1,79 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.DocMaker; +import org.apache.lucene.document.Document; + + +/** + * Add a document, optionally with of a cetrain size. + * Other side effects: none. + */ +public class AddDocTask extends PerfTask { + + private static int logStep = -1; + + // volatile data passed between setup(), doLogic(), tearDown(). + Document doc = null; + + /* + * (non-Javadoc) + * @see PerfTask#setup() + */ + public void setup() throws Exception { + DocMaker docMaker = getRunData().getDocMaker(); + int docSize = (int) getDoubleParam(); + if (docSize > 0) { + doc = docMaker.makeDocument(docSize); + } else { + doc = docMaker.makeDocument(); + } + } + + /* (non-Javadoc) + * @see PerfTask#tearDown() + */ + public void tearDown() { + DocMaker docMaker = getRunData().getDocMaker(); + log(docMaker.getCount()); + doc = null; + } + + public int doLogic() throws Exception { + getRunData().getIndexWriter().addDocument(doc); + return 1; + } + + private void log (int count) { + if (logStep<0) { + // avoid sync although race possible here + logStep = getRunData().getConfig().get("doc.add.log.step",500); + } + if (logStep>0 && (count%logStep)==0) { + System.out.println("--> processed "+count+" docs"); + } + } + + /* (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#setDoubleParam(double) + */ + public void setDoubleParam(double doubleParam) throws Exception { + this.doubleParam = doubleParam; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report all statistics with no aggregations. + * Other side effects: None. + */ +public class RepAllTask extends ReportTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportAll(); + + System.out.println(); + System.out.println("------------> Report All ("+rp.getSize()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + return 0; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report all statistics grouped/aggregated by name and round. + * Other side effects: None. + */ +public class RepSumByNameRoundTask extends ReportTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSumByNameRound(); + + System.out.println(); + System.out.println("------------> Report Sum By (any) Name and Round ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java (revision 0) @@ -0,0 +1,53 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report by-name-prefix statistics aggregated by name. + * Other side effects: None. + */ +public class RepSumByPrefTask extends ReportTask { + + protected String prefix; + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSumByPrefix(prefix); + + System.out.println(); + System.out.println("------------> Report Sum By Prefix ("+prefix+") ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } + + public void setPrefix(String prefix) { + this.prefix = prefix; + } + + /* (non-Javadoc) + * @see PerfTask#toString() + */ + public String toString() { + return super.toString()+" "+prefix; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java (revision 0) @@ -0,0 +1,44 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/** + * Reset all index and input data and call gc, does NOT erase index/dir, does NOT clear statistics. + * This contains ResetInputs. + * Other side effects: writers/readers nulified, closed. + * Index is NOT erased. + * Directory is NOT erased. + */ +public class ResetSystemSoftTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().reinit(false); + return 0; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.store.Directory; + +/** + * Open an index reader. + * Other side effects: index redaer object in perfRunData is set. + */ +public class OpenReaderTask extends PerfTask { + + public int doLogic() throws IOException { + Directory dir = getRunData().getDirectory(); + IndexReader reader = IndexReader.open(dir); + getRunData().setIndexReader(reader); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/** + * Reset inputs so that the test run would behave, input wise, + * as if it just started. This affects e.g. the generation of docs and queries. + */ +public class ResetInputsTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().resetInputs(); + return 0; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } + + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + * Close index reader. + * Other side effects: index reader in perfRunData is nullified. + * This would cause read related tasks to reopen their own reader. + */ +public class CloseReaderTask extends PerfTask { + + public int doLogic() throws IOException { + IndexReader reader= getRunData().getIndexReader(); + if (reader!=null) { + reader.close(); + } + getRunData().setIndexReader(null); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (revision 0) @@ -0,0 +1,54 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Open an index writer. + * Other side effects: index writer object in perfRunData is set. + */ +public class OpenIndexTask extends PerfTask { + + public int doLogic() throws IOException { + Directory dir = getRunData().getDirectory(); + Analyzer analyzer = getRunData().getAnalyzer(); + IndexWriter writer = new IndexWriter(dir, analyzer, false); + + Config config = getRunData().getConfig(); + + boolean cmpnd = config.get("compound",true); + int mrgf = config.get("merge.factor",10); + int mxbf = config.get("max.buffered",10); + + // must update params for newly opened writer + writer.setMaxBufferedDocs(mxbf); + writer.setMergeFactor(mrgf); + writer.setUseCompoundFile(cmpnd); // this one redundant? + + getRunData().setIndexWriter(writer); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 0) @@ -0,0 +1,54 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Create an index. + * Other side effects: index writer object in perfRunData is set. + */ +public class CreateIndexTask extends PerfTask { + + public int doLogic() throws IOException { + Directory dir = getRunData().getDirectory(); + Analyzer analyzer = getRunData().getAnalyzer(); + + IndexWriter iw = new IndexWriter(dir, analyzer, true); + + Config config = getRunData().getConfig(); + + boolean cmpnd = config.get("compound",true); + int mrgf = config.get("merge.factor",10); + int mxbf = config.get("max.buffered",10); + + iw.setUseCompoundFile(cmpnd); + iw.setMergeFactor(mrgf); + iw.setMaxBufferedDocs(mxbf); + + getRunData().setIndexWriter(iw); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/package.html (revision 0) @@ -0,0 +1,9 @@ + + + + + + +Extendable benchmark tasks. + + Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report all prefix matching statistics grouped/aggregated by name and round. + * Other side effects: None. + */ +public class RepSumByPrefRoundTask extends RepSumByPrefTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSumByPrefixRound(prefix); + + System.out.println(); + System.out.println("------------> Report sum by Prefix ("+prefix+") and Round ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (revision 0) @@ -0,0 +1,246 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.stats.Points; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; + +/** + * A (abstract) task to be tested for performance. + *
+ * Every performance task extends this class, and provides its own doLogic() method, + * which performss the actual task. + *
+ * Tasks performing some work that should be measured for the task, can overide setup() and/or tearDown() and + * placed that work there. + */ +public abstract class PerfTask { + + private PerfRunData runData; + + // propeties that all tasks have + private String name; + private int depth = 0; + private int maxDepthLogStart = 0; + protected double doubleParam = 0; + protected String stringParam = null; + protected char charParam = ' '; + + protected static final String NEW_LINE = System.getProperty("line.separator"); + + /** + * + */ + PerfTask() { + name = getClass().getSimpleName(); + if (name.endsWith("Task")) { + name = name.substring(0,name.length()-4); + } + } + + /** + * Run the task, record statistics. + * @return number of work items done by this task. + */ + public final int runAndMaybeStats(boolean reportStats) throws Exception { + if (reportStats && depth <= maxDepthLogStart && !shouldNeverLogAtStart()) { + System.out.println("------------> starting task: " + getName()); + } + if (shouldNotRecordStats() || !reportStats) { + setup(); + int count = doLogic(); + tearDown(); + return count; + } + setup(); + Points pnts = runData.getPoints(); + TaskStats ts = pnts.markTaskStart(this,runData.getConfig().getRoundNumber()); + int count = doLogic(); + pnts.markTaskEnd(ts, count); + tearDown(); + return count; + } + + /** + * Perform the task once (ignoring repetions specification) + * Return number of work items done by this task. + * For indexing that can be number of docs added. + * For warming that can be number of scanned items, etc. + * @return number of work items done by this task. + */ + public abstract int doLogic() throws Exception; + + /** + * @return Returns the name. + */ + public String getName() { + if (charParam==' ' && doubleParam==0 && stringParam==null) return name; + StringBuffer sb = new StringBuffer(name).append('('); + if (charParam!=' ') return sb.append(charParam).append(')').toString(); + if (doubleParam!=0) return sb.append(doubleParam).append(')').toString(); + // must be string + return sb.append(stringParam).append(')').toString(); + } + + /** + * @param name The name to set. + */ + protected void setName(String name) { + this.name = name; + } + + /** + * @return Returns the run data. + */ + public PerfRunData getRunData() { + return runData; + } + + /** + * @param runData The run data to set. + */ + public void setRunData(PerfRunData runData) { + this.runData = runData; + } + + /** + * @return Returns the depth. + */ + public int getDepth() { + return depth; + } + + /** + * @param depth The depth to set. + */ + public void setDepth(int depth, int maxDepthLogStart) { + this.depth = depth; + this.maxDepthLogStart = maxDepthLogStart; + } + + // compute a blank string padding for printing this task indented by its depth + String getPadding () { + char c[] = new char[4*getDepth()]; + for (int i = 0; i < c.length; i++) c[i] = ' '; + return new String(c); + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + String padd = getPadding(); + StringBuffer sb = new StringBuffer(padd); + sb.append(getName()); + return sb.toString(); + } + + /** + * @return Returns the maxDepthLogStart. + */ + int getMaxDepthLogStart() { + return maxDepthLogStart; + } + + /** + * Tasks that should never log at start can overide this. + * @return true if this task should never log when it start. + */ + protected boolean shouldNeverLogAtStart () { + return false; + } + + /** + * Tasks that should not record statistics can overide this. + * @return true if this task should never record its statistics. + */ + protected boolean shouldNotRecordStats () { + return false; + } + + /** + * Task setup work that should not be measured for that specific task. + * By default it does nothing, but tasks can implement this, moving work from + * doLogic() to this method. Only the work done in doLogicis measured for this task. + * Notice that higher level (sequence) tasks containing this task would then + * measure larger time than the sum of their contained tasks. + * @throws Exception + */ + public void setup () throws Exception { + } + + /** + * Task tearDown work that should not be measured for that specific task. + * By default it does nothing, but tasks can implement this, moving work from + * doLogic() to this method. Only the work done in doLogicis measured for this task. + * Notice that higher level (sequence) tasks containing this task would then + * measure larger time than the sum of their contained tasks. + */ + public void tearDown () throws Exception { + } + + /** + * @return Returns the charParam. + */ + public char getCharParam() { + return charParam; + } + + /** + * Tasks supporting a char parameter should overide this method. + * @param p The charParam to set. + * @throws Exception if this task does not support a char Parameter + */ + public void setCharParam(char p) throws Exception { + throw new Exception(getName()+" does not support a char Parameter: "+p); + } + + /** + * @return Returns the doubleParam. + */ + public double getDoubleParam() { + return doubleParam; + } + + /** + * Tasks supporting a double parameter should overide this method. + * @param p The doubleParam to set. + * @throws Exception if this task does not support a duoble Parameter + */ + public void setDoubleParam(double p) throws Exception { + throw new Exception(getName()+" does not support a double Parameter: "+p); + } + + /** + * @return Returns the stringParam. + */ + public String getStringParam() { + return stringParam; + } + + /** + * Tasks supporting a string parameter should overide this method. + * @param p The stringParam to set. + * @throws Exception if this task does not support a String Paramete + */ + public void setStringParam(String p) throws Exception { + throw new Exception(getName()+" does not support a String Parameter: "+p); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexWriter; + +/** + * Close index writer. + * Other side effects: index writer object in perfRunData is nullified. + */ +public class CloseIndexTask extends PerfTask { + + public int doLogic() throws IOException { + IndexWriter iw = getRunData().getIndexWriter(); + if (iw!=null) { + iw.close(); + } + getRunData().setIndexWriter(null); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report by-name-prefix statistics with no aggregations. + * Other side effects: None. + */ +public class RepSelectByPrefTask extends RepSumByPrefTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSelectByPrefix(prefix); + + System.out.println(); + System.out.println("------------> Report Select By Prefix ("+prefix+") ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java (revision 0) @@ -0,0 +1,35 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexWriter; + +/** + * Optimize the index. + * Other side effects: none. + */ +public class OptimizeTask extends PerfTask { + + public int doLogic() throws Exception { + IndexWriter iw = getRunData().getIndexWriter(); + iw.optimize(); + //System.out.println("optimize called"); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** + * Increment the counter for properties maintained by Round Number. + * Other side effects: if there are props by round number, log value change. + */ +public class NewRoundTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().getConfig().newRound(); + return 0; + } + + /* (non-Javadoc) + * @see PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; + +/** + * Search and Travrese task. + */ +public class SearchTravTask extends ReadTask { + + public boolean withRetrieve() { + return false; + } + + public boolean withSearch() { + return true; + } + + public boolean withTraverse() { + return true; + } + + public boolean withWarm() { + return false; + } + + public QueryMaker getQueryMaker() { + return getRunData().getSearchTravRetQueryMaker(); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Clear statistics data. + * Other side effects: None. + */ +public class ClearStatsTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().getPoints().clearData(); + return 0; + } + + /* (non-Javadoc) + * @see PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (revision 0) @@ -0,0 +1,48 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; + +/** + * Search task. + */ +public class SearchTask extends ReadTask { + + public boolean withRetrieve() { + return false; + } + + public boolean withSearch() { + return true; + } + + public boolean withTraverse() { + return false; + } + + public boolean withWarm() { + return false; + } + + public QueryMaker getQueryMaker() { + return getRunData().getSearchQueryMaker(); + } + + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java (revision 0) @@ -0,0 +1,48 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; + +/** + * Warm reader task. + */ +public class WarmTask extends ReadTask { + + public boolean withRetrieve() { + return false; + } + + public boolean withSearch() { + return false; + } + + public boolean withTraverse() { + return false; + } + + public boolean withWarm() { + return true; + } + + public QueryMaker getQueryMaker() { + return null; // not required for this task. + } + + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report all statistics aggregated by name. + * Other side effects: None. + */ +public class RepSumByNameTask extends ReportTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSumByName(); + + System.out.println(); + System.out.println("------------> Report Sum By (any) Name ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java (revision 0) @@ -0,0 +1,44 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/** + * Reset all index and input data and call gc, erase index and dir, does NOT clear statistics. + * This contains ResetInputs. + * Other side effects: writers/readers nulified, deleted, closed. + * Index is erased. + * Directory is erased. + */ +public class ResetSystemEraseTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().reinit(true); + return 0; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (revision 0) @@ -0,0 +1,257 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Iterator; + +/** + * Sequence of parallel or sequential tasks. + */ +public class TaskSequence extends PerfTask { + private ArrayList tasks; + private int repetitions = 1; + private boolean parallel; + private TaskSequence parent; + private boolean letChildReport = true; + private int rate = 0; + private boolean perMin = false; // rate, if set, is, by default, be sec. + private String seqName; + + public TaskSequence (TaskSequence parent, boolean parallel) { + setName(parallel ? "Par" : "Seq"); + this.parent = parent; + this.parallel = parallel; + tasks = new ArrayList(); + } + + /** + * @return Returns the parallel. + */ + public boolean isParallel() { + return parallel; + } + + /** + * @return Returns the repetitions. + */ + public int getRepetitions() { + return repetitions; + } + + /** + * @param repetitions The repetitions to set. + */ + public void setRepetitions(int repetitions) { + this.repetitions = repetitions; + updateSeqname(); + } + + /** + * @return Returns the parent. + */ + public TaskSequence getParent() { + return parent; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#doLogic() + */ + public int doLogic() throws Exception { + return ( parallel ? doParallelTasks() : doSerialTasks()); + } + + private int doSerialTasks() throws Exception { + if (rate > 0) { + return doSerialTasksWithRate(); + } + + int count = 0; + for (int k=0; k 0) { + //System.out.println("wait: "+waitMore+" for rate: "+ratePerMin+" (delayStep="+delayStep+")"); + Thread.sleep(waitMore); + } + nextStartTime += delayStep; // this aims at avarage rate. + count += task.runAndMaybeStats(letChildReport); + } + } + return count; + } + + private int doParallelTasks() throws Exception { + final int count [] = {0}; + Thread t[] = new Thread [repetitions * tasks.size()]; + // prepare threads + int indx = 0; + for (int k=0; k 0) { + startlThreadsWithRate(t); + return; + } + for (int i = 0; i < t.length; i++) { + t[i].start(); + } + } + + // run threadsm with rate + private void startlThreadsWithRate(Thread[] t) throws InterruptedException { + long delayStep = (perMin ? 60000 : 1000) /rate; + long nextStartTime = System.currentTimeMillis(); + for (int i = 0; i < t.length; i++) { + long waitMore = nextStartTime - System.currentTimeMillis(); + if (waitMore > 0) { + //System.out.println("thread wait: "+waitMore+" for rate: "+ratePerMin+" (delayStep="+delayStep+")"); + Thread.sleep(waitMore); + } + nextStartTime += delayStep; // this aims at avarage rate of starting threads. + t[i].start(); + } + } + + public void addTask(PerfTask task) { + tasks.add(task); + task.setDepth(getDepth()+1,getMaxDepthLogStart()); + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + String padd = getPadding(); + StringBuffer sb = new StringBuffer(super.toString()); + sb.append(parallel ? " [" : " {"); + sb.append(NEW_LINE); + for (Iterator it = tasks.iterator(); it.hasNext();) { + PerfTask task = (PerfTask) it.next(); + sb.append(task.toString()); + sb.append(NEW_LINE); + } + sb.append(padd); + sb.append(!letChildReport ? ">" : (parallel ? "]" : "}")); + if (repetitions>1) { + sb.append(" * " + repetitions); + } + if (rate>0) { + sb.append(", rate: " + rate+"/"+(perMin?"min":"sec")); + } + return sb.toString(); + } + + /** + * Execute child tasks in a way that they do not reprt their time separately. + * Current implementation if child tasks has child tasks of their own, those are not affected by this call. + */ + public void setNoChildReport() { + letChildReport = false; + } + + /** + * Returns the rate per minute: how many operations should be performed in a minute. + * If 0 this has no effect. + * @return the rate per min: how many operations should be performed in a minute. + */ + public int getRate() { + return (perMin ? rate : 60*rate); + } + + /** + * @param rate The rate to set. + */ + public void setRate(int rate, boolean perMin) { + this.rate = rate; + this.perMin = perMin; + updateSeqname(); + } + + public void setName(String name) { + super.setName(name); + updateSeqname(); + } + + private void updateSeqname() { + seqName = super.getName(); + if (repetitions>1) { + seqName += "_"+repetitions; + } + if (rate>0) { + seqName += "_" + rate + (perMin?"/min":"/sec"); + } + if (parallel && !seqName.toLowerCase().contains("par")) { + seqName += "_Par"; + } + } + + public String getName() { + return seqName; // overide to include more info + } + + /** + * @return Returns the tasks. + */ + public ArrayList getTasks() { + return tasks; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 0) @@ -0,0 +1,119 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; + + + +/** + * Read index (abstract) task. + * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve() + * methods to configure the actual action. + * Other side effects: none. + */ +public abstract class ReadTask extends PerfTask { + + public int doLogic() throws Exception { + int res = 0; + boolean closeReader = false; + + // open reader or use existing one + IndexReader ir = getRunData().getIndexReader(); + if (ir == null) { + Directory dir = getRunData().getDirectory(); + ir = IndexReader.open(dir); + closeReader = true; + //res++; //this is confusing, comment it out + } + + // optionally warm and add num docs traversed to count + if (withWarm()) { + Document doc = null; + for (int m = 0; m < ir.maxDoc(); m++) { + if (!ir.isDeleted(m)) { + doc = ir.document(m); + res += (doc==null ? 0 : 1); + } + } + } + + if (withSearch()) { + res++; + IndexSearcher searcher = new IndexSearcher(ir); + QueryMaker queryMaker = getQueryMaker(); + Query q = queryMaker.makeQuery(); + Hits hits = searcher.search(q); + //System.out.println("searched: "+q); + + if (withTraverse()) { + Document doc = null; + if (hits != null && hits.length() > 0) { + for (int m = 0; m < hits.length(); m++) { + int id = hits.id(m); + res++; + + if (withRetrieve()) { + doc = ir.document(id); + res += (doc==null ? 0 : 1); + } + } + } + } + + searcher.close(); + } + + if (closeReader) { + ir.close(); + } + return res; + } + + /** + * Return query maker used for this task. + */ + public abstract QueryMaker getQueryMaker(); + + /** + * Return true if search should be performed. + */ + public abstract boolean withSearch (); + + /** + * Return true if warming should be performed. + */ + public abstract boolean withWarm (); + + /** + * Return true if, with search, results should be traversed. + */ + public abstract boolean withTraverse (); + + /** + * Return true if, with search & results traversing, docs should be retrieved. + */ + public abstract boolean withRetrieve (); + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Report (abstract) task - all report tasks extend this task. + */ +public abstract class ReportTask extends PerfTask { + + /* (non-Javadoc) + * @see PerfTask#shouldNeverLogAtStart() + */ + protected boolean shouldNeverLogAtStart() { + return true; + } + + /* (non-Javadoc) + * @see PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 0) @@ -0,0 +1,79 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.DocMaker; +import org.apache.lucene.document.Document; + + +/** + * Add a document, optionally with of a cetrain size. + * Other side effects: none. + */ +public class AddDocTask extends PerfTask { + + private static int logStep = -1; + + // volatile data passed between setup(), doLogic(), tearDown(). + Document doc = null; + + /* + * (non-Javadoc) + * @see PerfTask#setup() + */ + public void setup() throws Exception { + DocMaker docMaker = getRunData().getDocMaker(); + int docSize = (int) getDoubleParam(); + if (docSize > 0) { + doc = docMaker.makeDocument(docSize); + } else { + doc = docMaker.makeDocument(); + } + } + + /* (non-Javadoc) + * @see PerfTask#tearDown() + */ + public void tearDown() { + DocMaker docMaker = getRunData().getDocMaker(); + log(docMaker.getCount()); + doc = null; + } + + public int doLogic() throws Exception { + getRunData().getIndexWriter().addDocument(doc); + return 1; + } + + private void log (int count) { + if (logStep<0) { + // avoid sync although race possible here + logStep = getRunData().getConfig().get("doc.add.log.step",500); + } + if (logStep>0 && (count%logStep)==0) { + System.out.println("--> processed "+count+" docs"); + } + } + + /* (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#setDoubleParam(double) + */ + public void setDoubleParam(double doubleParam) throws Exception { + this.doubleParam = doubleParam; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ClearStatsTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Clear statistics data. + * Other side effects: None. + */ +public class ClearStatsTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().getPoints().clearData(); + return 0; + } + + /* (non-Javadoc) + * @see PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexWriter; + +/** + * Close index writer. + * Other side effects: index writer object in perfRunData is nullified. + */ +public class CloseIndexTask extends PerfTask { + + public int doLogic() throws IOException { + IndexWriter iw = getRunData().getIndexWriter(); + if (iw!=null) { + iw.close(); + } + getRunData().setIndexWriter(null); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + * Close index reader. + * Other side effects: index reader in perfRunData is nullified. + * This would cause read related tasks to reopen their own reader. + */ +public class CloseReaderTask extends PerfTask { + + public int doLogic() throws IOException { + IndexReader reader= getRunData().getIndexReader(); + if (reader!=null) { + reader.close(); + } + getRunData().setIndexReader(null); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 0) @@ -0,0 +1,54 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Create an index. + * Other side effects: index writer object in perfRunData is set. + */ +public class CreateIndexTask extends PerfTask { + + public int doLogic() throws IOException { + Directory dir = getRunData().getDirectory(); + Analyzer analyzer = getRunData().getAnalyzer(); + + IndexWriter iw = new IndexWriter(dir, analyzer, true); + + Config config = getRunData().getConfig(); + + boolean cmpnd = config.get("compound",true); + int mrgf = config.get("merge.factor",10); + int mxbf = config.get("max.buffered",10); + + iw.setUseCompoundFile(cmpnd); + iw.setMergeFactor(mrgf); + iw.setMaxBufferedDocs(mxbf); + + getRunData().setIndexWriter(iw); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/NewRoundTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** + * Increment the counter for properties maintained by Round Number. + * Other side effects: if there are props by round number, log value change. + */ +public class NewRoundTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().getConfig().newRound(); + return 0; + } + + /* (non-Javadoc) + * @see PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (revision 0) @@ -0,0 +1,54 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.benchmark.byTask.utils.Config; + + +/** + * Open an index writer. + * Other side effects: index writer object in perfRunData is set. + */ +public class OpenIndexTask extends PerfTask { + + public int doLogic() throws IOException { + Directory dir = getRunData().getDirectory(); + Analyzer analyzer = getRunData().getAnalyzer(); + IndexWriter writer = new IndexWriter(dir, analyzer, false); + + Config config = getRunData().getConfig(); + + boolean cmpnd = config.get("compound",true); + int mrgf = config.get("merge.factor",10); + int mxbf = config.get("max.buffered",10); + + // must update params for newly opened writer + writer.setMaxBufferedDocs(mxbf); + writer.setMergeFactor(mrgf); + writer.setUseCompoundFile(cmpnd); // this one redundant? + + getRunData().setIndexWriter(writer); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.store.Directory; + +/** + * Open an index reader. + * Other side effects: index redaer object in perfRunData is set. + */ +public class OpenReaderTask extends PerfTask { + + public int doLogic() throws IOException { + Directory dir = getRunData().getDirectory(); + IndexReader reader = IndexReader.open(dir); + getRunData().setIndexReader(reader); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/OptimizeTask.java (revision 0) @@ -0,0 +1,35 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexWriter; + +/** + * Optimize the index. + * Other side effects: none. + */ +public class OptimizeTask extends PerfTask { + + public int doLogic() throws Exception { + IndexWriter iw = getRunData().getIndexWriter(); + iw.optimize(); + //System.out.println("optimize called"); + return 1; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/package.html (revision 0) @@ -0,0 +1,9 @@ + + + + + + +Extendable benchmark tasks. + + Index: src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (revision 0) @@ -0,0 +1,246 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.stats.Points; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; + +/** + * A (abstract) task to be tested for performance. + *
+ * Every performance task extends this class, and provides its own doLogic() method, + * which performss the actual task. + *
+ * Tasks performing some work that should be measured for the task, can overide setup() and/or tearDown() and + * placed that work there. + */ +public abstract class PerfTask { + + private PerfRunData runData; + + // propeties that all tasks have + private String name; + private int depth = 0; + private int maxDepthLogStart = 0; + protected double doubleParam = 0; + protected String stringParam = null; + protected char charParam = ' '; + + protected static final String NEW_LINE = System.getProperty("line.separator"); + + /** + * + */ + PerfTask() { + name = getClass().getSimpleName(); + if (name.endsWith("Task")) { + name = name.substring(0,name.length()-4); + } + } + + /** + * Run the task, record statistics. + * @return number of work items done by this task. + */ + public final int runAndMaybeStats(boolean reportStats) throws Exception { + if (reportStats && depth <= maxDepthLogStart && !shouldNeverLogAtStart()) { + System.out.println("------------> starting task: " + getName()); + } + if (shouldNotRecordStats() || !reportStats) { + setup(); + int count = doLogic(); + tearDown(); + return count; + } + setup(); + Points pnts = runData.getPoints(); + TaskStats ts = pnts.markTaskStart(this,runData.getConfig().getRoundNumber()); + int count = doLogic(); + pnts.markTaskEnd(ts, count); + tearDown(); + return count; + } + + /** + * Perform the task once (ignoring repetions specification) + * Return number of work items done by this task. + * For indexing that can be number of docs added. + * For warming that can be number of scanned items, etc. + * @return number of work items done by this task. + */ + public abstract int doLogic() throws Exception; + + /** + * @return Returns the name. + */ + public String getName() { + if (charParam==' ' && doubleParam==0 && stringParam==null) return name; + StringBuffer sb = new StringBuffer(name).append('('); + if (charParam!=' ') return sb.append(charParam).append(')').toString(); + if (doubleParam!=0) return sb.append(doubleParam).append(')').toString(); + // must be string + return sb.append(stringParam).append(')').toString(); + } + + /** + * @param name The name to set. + */ + protected void setName(String name) { + this.name = name; + } + + /** + * @return Returns the run data. + */ + public PerfRunData getRunData() { + return runData; + } + + /** + * @param runData The run data to set. + */ + public void setRunData(PerfRunData runData) { + this.runData = runData; + } + + /** + * @return Returns the depth. + */ + public int getDepth() { + return depth; + } + + /** + * @param depth The depth to set. + */ + public void setDepth(int depth, int maxDepthLogStart) { + this.depth = depth; + this.maxDepthLogStart = maxDepthLogStart; + } + + // compute a blank string padding for printing this task indented by its depth + String getPadding () { + char c[] = new char[4*getDepth()]; + for (int i = 0; i < c.length; i++) c[i] = ' '; + return new String(c); + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + String padd = getPadding(); + StringBuffer sb = new StringBuffer(padd); + sb.append(getName()); + return sb.toString(); + } + + /** + * @return Returns the maxDepthLogStart. + */ + int getMaxDepthLogStart() { + return maxDepthLogStart; + } + + /** + * Tasks that should never log at start can overide this. + * @return true if this task should never log when it start. + */ + protected boolean shouldNeverLogAtStart () { + return false; + } + + /** + * Tasks that should not record statistics can overide this. + * @return true if this task should never record its statistics. + */ + protected boolean shouldNotRecordStats () { + return false; + } + + /** + * Task setup work that should not be measured for that specific task. + * By default it does nothing, but tasks can implement this, moving work from + * doLogic() to this method. Only the work done in doLogicis measured for this task. + * Notice that higher level (sequence) tasks containing this task would then + * measure larger time than the sum of their contained tasks. + * @throws Exception + */ + public void setup () throws Exception { + } + + /** + * Task tearDown work that should not be measured for that specific task. + * By default it does nothing, but tasks can implement this, moving work from + * doLogic() to this method. Only the work done in doLogicis measured for this task. + * Notice that higher level (sequence) tasks containing this task would then + * measure larger time than the sum of their contained tasks. + */ + public void tearDown () throws Exception { + } + + /** + * @return Returns the charParam. + */ + public char getCharParam() { + return charParam; + } + + /** + * Tasks supporting a char parameter should overide this method. + * @param p The charParam to set. + * @throws Exception if this task does not support a char Parameter + */ + public void setCharParam(char p) throws Exception { + throw new Exception(getName()+" does not support a char Parameter: "+p); + } + + /** + * @return Returns the doubleParam. + */ + public double getDoubleParam() { + return doubleParam; + } + + /** + * Tasks supporting a double parameter should overide this method. + * @param p The doubleParam to set. + * @throws Exception if this task does not support a duoble Parameter + */ + public void setDoubleParam(double p) throws Exception { + throw new Exception(getName()+" does not support a double Parameter: "+p); + } + + /** + * @return Returns the stringParam. + */ + public String getStringParam() { + return stringParam; + } + + /** + * Tasks supporting a string parameter should overide this method. + * @param p The stringParam to set. + * @throws Exception if this task does not support a String Paramete + */ + public void setStringParam(String p) throws Exception { + throw new Exception(getName()+" does not support a String Parameter: "+p); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 0) @@ -0,0 +1,119 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; + + + +/** + * Read index (abstract) task. + * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve() + * methods to configure the actual action. + * Other side effects: none. + */ +public abstract class ReadTask extends PerfTask { + + public int doLogic() throws Exception { + int res = 0; + boolean closeReader = false; + + // open reader or use existing one + IndexReader ir = getRunData().getIndexReader(); + if (ir == null) { + Directory dir = getRunData().getDirectory(); + ir = IndexReader.open(dir); + closeReader = true; + //res++; //this is confusing, comment it out + } + + // optionally warm and add num docs traversed to count + if (withWarm()) { + Document doc = null; + for (int m = 0; m < ir.maxDoc(); m++) { + if (!ir.isDeleted(m)) { + doc = ir.document(m); + res += (doc==null ? 0 : 1); + } + } + } + + if (withSearch()) { + res++; + IndexSearcher searcher = new IndexSearcher(ir); + QueryMaker queryMaker = getQueryMaker(); + Query q = queryMaker.makeQuery(); + Hits hits = searcher.search(q); + //System.out.println("searched: "+q); + + if (withTraverse()) { + Document doc = null; + if (hits != null && hits.length() > 0) { + for (int m = 0; m < hits.length(); m++) { + int id = hits.id(m); + res++; + + if (withRetrieve()) { + doc = ir.document(id); + res += (doc==null ? 0 : 1); + } + } + } + } + + searcher.close(); + } + + if (closeReader) { + ir.close(); + } + return res; + } + + /** + * Return query maker used for this task. + */ + public abstract QueryMaker getQueryMaker(); + + /** + * Return true if search should be performed. + */ + public abstract boolean withSearch (); + + /** + * Return true if warming should be performed. + */ + public abstract boolean withWarm (); + + /** + * Return true if, with search, results should be traversed. + */ + public abstract boolean withTraverse (); + + /** + * Return true if, with search & results traversing, docs should be retrieved. + */ + public abstract boolean withRetrieve (); + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepAllTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report all statistics with no aggregations. + * Other side effects: None. + */ +public class RepAllTask extends ReportTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportAll(); + + System.out.println(); + System.out.println("------------> Report All ("+rp.getSize()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + return 0; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Report (abstract) task - all report tasks extend this task. + */ +public abstract class ReportTask extends PerfTask { + + /* (non-Javadoc) + * @see PerfTask#shouldNeverLogAtStart() + */ + protected boolean shouldNeverLogAtStart() { + return true; + } + + /* (non-Javadoc) + * @see PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSelectByPrefTask.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report by-name-prefix statistics with no aggregations. + * Other side effects: None. + */ +public class RepSelectByPrefTask extends RepSumByPrefTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSelectByPrefix(prefix); + + System.out.println(); + System.out.println("------------> Report Select By Prefix ("+prefix+") ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameRoundTask.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report all statistics grouped/aggregated by name and round. + * Other side effects: None. + */ +public class RepSumByNameRoundTask extends ReportTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSumByNameRound(); + + System.out.println(); + System.out.println("------------> Report Sum By (any) Name and Round ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByNameTask.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report all statistics aggregated by name. + * Other side effects: None. + */ +public class RepSumByNameTask extends ReportTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSumByName(); + + System.out.println(); + System.out.println("------------> Report Sum By (any) Name ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefRoundTask.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report all prefix matching statistics grouped/aggregated by name and round. + * Other side effects: None. + */ +public class RepSumByPrefRoundTask extends RepSumByPrefTask { + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSumByPrefixRound(prefix); + + System.out.println(); + System.out.println("------------> Report sum by Prefix ("+prefix+") and Round ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/RepSumByPrefTask.java (revision 0) @@ -0,0 +1,53 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.stats.Report; + +/** + * Report by-name-prefix statistics aggregated by name. + * Other side effects: None. + */ +public class RepSumByPrefTask extends ReportTask { + + protected String prefix; + + public int doLogic() throws Exception { + Report rp = getRunData().getPoints().reportSumByPrefix(prefix); + + System.out.println(); + System.out.println("------------> Report Sum By Prefix ("+prefix+") ("+ + rp.getSize()+" about "+rp.getReported()+" out of "+rp.getOutOf()+")"); + System.out.println(rp.getText()); + System.out.println(); + + return 0; + } + + public void setPrefix(String prefix) { + this.prefix = prefix; + } + + /* (non-Javadoc) + * @see PerfTask#toString() + */ + public String toString() { + return super.toString()+" "+prefix; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ResetInputsTask.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/** + * Reset inputs so that the test run would behave, input wise, + * as if it just started. This affects e.g. the generation of docs and queries. + */ +public class ResetInputsTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().resetInputs(); + return 0; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } + + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemEraseTask.java (revision 0) @@ -0,0 +1,44 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/** + * Reset all index and input data and call gc, erase index and dir, does NOT clear statistics. + * This contains ResetInputs. + * Other side effects: writers/readers nulified, deleted, closed. + * Index is erased. + * Directory is erased. + */ +public class ResetSystemEraseTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().reinit(true); + return 0; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/ResetSystemSoftTask.java (revision 0) @@ -0,0 +1,44 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/** + * Reset all index and input data and call gc, does NOT erase index/dir, does NOT clear statistics. + * This contains ResetInputs. + * Other side effects: writers/readers nulified, closed. + * Index is NOT erased. + * Directory is NOT erased. + */ +public class ResetSystemSoftTask extends PerfTask { + + public int doLogic() throws Exception { + getRunData().reinit(false); + return 0; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#shouldNotRecordStats() + */ + protected boolean shouldNotRecordStats() { + return true; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (revision 0) @@ -0,0 +1,48 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; + +/** + * Search task. + */ +public class SearchTask extends ReadTask { + + public boolean withRetrieve() { + return false; + } + + public boolean withSearch() { + return true; + } + + public boolean withTraverse() { + return false; + } + + public boolean withWarm() { + return false; + } + + public QueryMaker getQueryMaker() { + return getRunData().getSearchQueryMaker(); + } + + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetTask.java (revision 0) @@ -0,0 +1,48 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; + +/** + * Search and Travrese and Retrieve docs task. + */ +public class SearchTravRetTask extends ReadTask { + + public boolean withRetrieve() { + return true; + } + + public boolean withSearch() { + return true; + } + + public boolean withTraverse() { + return true; + } + + public boolean withWarm() { + return false; + } + + public QueryMaker getQueryMaker() { + return getRunData().getSearchTravQueryMaker(); + } + + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravTask.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; + +/** + * Search and Travrese task. + */ +public class SearchTravTask extends ReadTask { + + public boolean withRetrieve() { + return false; + } + + public boolean withSearch() { + return true; + } + + public boolean withTraverse() { + return true; + } + + public boolean withWarm() { + return false; + } + + public QueryMaker getQueryMaker() { + return getRunData().getSearchTravRetQueryMaker(); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (revision 0) @@ -0,0 +1,257 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Iterator; + +/** + * Sequence of parallel or sequential tasks. + */ +public class TaskSequence extends PerfTask { + private ArrayList tasks; + private int repetitions = 1; + private boolean parallel; + private TaskSequence parent; + private boolean letChildReport = true; + private int rate = 0; + private boolean perMin = false; // rate, if set, is, by default, be sec. + private String seqName; + + public TaskSequence (TaskSequence parent, boolean parallel) { + setName(parallel ? "Par" : "Seq"); + this.parent = parent; + this.parallel = parallel; + tasks = new ArrayList(); + } + + /** + * @return Returns the parallel. + */ + public boolean isParallel() { + return parallel; + } + + /** + * @return Returns the repetitions. + */ + public int getRepetitions() { + return repetitions; + } + + /** + * @param repetitions The repetitions to set. + */ + public void setRepetitions(int repetitions) { + this.repetitions = repetitions; + updateSeqname(); + } + + /** + * @return Returns the parent. + */ + public TaskSequence getParent() { + return parent; + } + + /* + * (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#doLogic() + */ + public int doLogic() throws Exception { + return ( parallel ? doParallelTasks() : doSerialTasks()); + } + + private int doSerialTasks() throws Exception { + if (rate > 0) { + return doSerialTasksWithRate(); + } + + int count = 0; + for (int k=0; k 0) { + //System.out.println("wait: "+waitMore+" for rate: "+ratePerMin+" (delayStep="+delayStep+")"); + Thread.sleep(waitMore); + } + nextStartTime += delayStep; // this aims at avarage rate. + count += task.runAndMaybeStats(letChildReport); + } + } + return count; + } + + private int doParallelTasks() throws Exception { + final int count [] = {0}; + Thread t[] = new Thread [repetitions * tasks.size()]; + // prepare threads + int indx = 0; + for (int k=0; k 0) { + startlThreadsWithRate(t); + return; + } + for (int i = 0; i < t.length; i++) { + t[i].start(); + } + } + + // run threadsm with rate + private void startlThreadsWithRate(Thread[] t) throws InterruptedException { + long delayStep = (perMin ? 60000 : 1000) /rate; + long nextStartTime = System.currentTimeMillis(); + for (int i = 0; i < t.length; i++) { + long waitMore = nextStartTime - System.currentTimeMillis(); + if (waitMore > 0) { + //System.out.println("thread wait: "+waitMore+" for rate: "+ratePerMin+" (delayStep="+delayStep+")"); + Thread.sleep(waitMore); + } + nextStartTime += delayStep; // this aims at avarage rate of starting threads. + t[i].start(); + } + } + + public void addTask(PerfTask task) { + tasks.add(task); + task.setDepth(getDepth()+1,getMaxDepthLogStart()); + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + String padd = getPadding(); + StringBuffer sb = new StringBuffer(super.toString()); + sb.append(parallel ? " [" : " {"); + sb.append(NEW_LINE); + for (Iterator it = tasks.iterator(); it.hasNext();) { + PerfTask task = (PerfTask) it.next(); + sb.append(task.toString()); + sb.append(NEW_LINE); + } + sb.append(padd); + sb.append(!letChildReport ? ">" : (parallel ? "]" : "}")); + if (repetitions>1) { + sb.append(" * " + repetitions); + } + if (rate>0) { + sb.append(", rate: " + rate+"/"+(perMin?"min":"sec")); + } + return sb.toString(); + } + + /** + * Execute child tasks in a way that they do not reprt their time separately. + * Current implementation if child tasks has child tasks of their own, those are not affected by this call. + */ + public void setNoChildReport() { + letChildReport = false; + } + + /** + * Returns the rate per minute: how many operations should be performed in a minute. + * If 0 this has no effect. + * @return the rate per min: how many operations should be performed in a minute. + */ + public int getRate() { + return (perMin ? rate : 60*rate); + } + + /** + * @param rate The rate to set. + */ + public void setRate(int rate, boolean perMin) { + this.rate = rate; + this.perMin = perMin; + updateSeqname(); + } + + public void setName(String name) { + super.setName(name); + updateSeqname(); + } + + private void updateSeqname() { + seqName = super.getName(); + if (repetitions>1) { + seqName += "_"+repetitions; + } + if (rate>0) { + seqName += "_" + rate + (perMin?"/min":"/sec"); + } + if (parallel && !seqName.toLowerCase().contains("par")) { + seqName += "_Par"; + } + } + + public String getName() { + return seqName; // overide to include more info + } + + /** + * @return Returns the tasks. + */ + public ArrayList getTasks() { + return tasks; + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/tasks/WarmTask.java (revision 0) @@ -0,0 +1,48 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; + +/** + * Warm reader task. + */ +public class WarmTask extends ReadTask { + + public boolean withRetrieve() { + return false; + } + + public boolean withSearch() { + return false; + } + + public boolean withTraverse() { + return false; + } + + public boolean withWarm() { + return true; + } + + public QueryMaker getQueryMaker() { + return null; // not required for this task. + } + + +} Index: src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java (revision 0) @@ -0,0 +1,54 @@ +package org.apache.lucene.benchmark.byTask.utils; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; + +/** + * File utils + */ +public class FileUtils { + + /** + * Delete files and directories, even if non-empty. + * + * @param dir file or directory + * @return true on success, false if no or part of files have been deleted + * @throws java.io.IOException + */ + public static boolean fullyDelete(File dir) throws IOException { + if (dir == null || !dir.exists()) return false; + File contents[] = dir.listFiles(); + if (contents != null) { + for (int i = 0; i < contents.length; i++) { + if (contents[i].isFile()) { + if (!contents[i].delete()) { + return false; + } + } else { + if (!fullyDelete(contents[i])) { + return false; + } + } + } + } + return dir.delete(); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/utils/Config.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (revision 0) @@ -0,0 +1,239 @@ +package org.apache.lucene.benchmark.byTask.utils; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Properties; +import java.util.StringTokenizer; + +/** + * Perf run configuration properties. + * Numeric peroperty containing ".", e.g. "10.100.5" is interpreted + * as array of numeric values. It is extracted once, on first use, and + * maintain an round number to return the appropriate value. + */ +public class Config { + + private int roundNumber = 0; + private Properties props; + private HashMap valByRound = new HashMap(); + private HashMap colForValByRound = new HashMap(); + + public Config (Properties props) { + this.props = props; + } + + /** + * Return a string property. + * @param name name of property. + * @param dflt default value. + * @return a string property. + */ + public String get (String name, String dflt) { + return props.getProperty(name,dflt); + } + + /** + * Return an int property. + * If the property contain ".", e.g. "10.100.5", it is interpreted + * as array of ints. It is extracted once, on first call + * to get() it, and a by-round-value is returned. + * @param name name of property + * @param dflt default value + * @return a int property. + */ + public int get (String name, int dflt) { + // use value by round if already parsed + int vals[] = (int[]) valByRound.get(name); + if (vals != null) { + return vals[roundNumber % vals.length]; + } + // done if not by round + String sval = props.getProperty(name,""+dflt); + if (sval.indexOf(".")<0) { + return Integer.parseInt(sval); + } + // first time this prop is extracted by round + int k = sval.indexOf("."); + String colName = sval.substring(0,k); + sval = sval.substring(k+1); + colForValByRound.put(name,colName); + vals = propToIntArray(sval); + valByRound.put(name,vals); + return vals[roundNumber % vals.length]; + } + + /** + * Return a boolean property. + * If the property contain ".", e.g. "true.true.false", it is interpreted + * as array of boleans. It is extracted once, on first call + * to get() it, and a by-round-value is returned. + * @param name name of property + * @param dflt default value + * @return a int property. + */ + public boolean get (String name, boolean dflt) { + // use value by round if already parsed + boolean vals[] = (boolean[]) valByRound.get(name); + if (vals != null) { + return vals[roundNumber % vals.length]; + } + // done if not by round + String sval = props.getProperty(name,""+dflt); + if (sval.indexOf(".")<0) { + return Boolean.parseBoolean(sval); + } + // first time this prop is extracted by round + int k = sval.indexOf("."); + String colName = sval.substring(0,k); + sval = sval.substring(k+1); + colForValByRound.put(name,colName); + vals = propToBooleanArray(sval); + valByRound.put(name,vals); + return vals[roundNumber % vals.length]; + } + + /** + * Increment the round number, for config values that are extracted by round number. + * @return the new round number. + */ + public int newRound () { + roundNumber++; + + // log changes in values + if (valByRound.size()>0) { + StringBuffer sb = new StringBuffer("--> Round ").append(roundNumber-1).append("-->").append(roundNumber).append(": "); + for (Iterator iter = valByRound.keySet().iterator(); iter.hasNext();) { + String name = (String) iter.next(); + Object a = valByRound.get(name); + if (a instanceof int[]) { + int ai[] = (int[]) a; + int n1 = (roundNumber-1)%ai.length; + int n2 = roundNumber%ai.length; + sb.append(" ").append(name).append(":").append(ai[n1]).append("-->").append(ai[n2]); + } else { + boolean ab[] = (boolean[]) a; + int n1 = (roundNumber-1)%ab.length; + int n2 = roundNumber%ab.length; + sb.append(" ").append(name).append(":").append(ab[n1]).append("-->").append(ab[n2]); + } + } + System.out.println(); + System.out.println(sb.toString()); + System.out.println(); + } + + return roundNumber; + } + + // extract properties to array, e.g. for "10.100.5" return int[]{10,100,5}. + private int[] propToIntArray (String s) { + if (!s.contains(".")) { + return new int [] { Integer.parseInt(s) }; + } + + ArrayList a = new ArrayList(); + StringTokenizer st = new StringTokenizer(s,"."); + while (st.hasMoreTokens()) { + String t = st.nextToken(); + a.add(new Integer(t)); + } + int res[] = new int[a.size()]; + for (int i=0; i' : + currSequence.setNoChildReport(); + case '}' : + case ']' : + // end sequence + colonOk = true; prevTask = currSequence; + currSequence = currSequence.getParent(); + break; + + } //switch(c) + break; + + } //switch(stok.ttype) + + } + + if (sequence != currSequence) { + throw new Exception("Unmatched sequences"); + } + + // remove redundant top level enclosing sequences + while (sequence.getRepetitions()==1 && sequence.getRate()==0) { + ArrayList t = sequence.getTasks(); + if (t!=null && t.size()==1) { + PerfTask p = (PerfTask) t.get(0); + if (p instanceof TaskSequence) { + sequence = (TaskSequence) p; + continue; + } + } + break; + } + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + String newline = System.getProperty("line.separator"); + StringBuffer sb = new StringBuffer(); + sb.append(sequence.toString()); + sb.append(newline); + return sb.toString(); + } + + /** + * Execute this algorithm + * @throws Exception + */ + public void execute() throws Exception { + sequence.doLogic(); + } + + +} + Index: src/java/org/apache/lucene/benchmark/byTask/utils/Format.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (revision 0) @@ -0,0 +1,71 @@ +package org.apache.lucene.benchmark.byTask.utils; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.NumberFormat; + +public class Format { + + private static NumberFormat numFormat [] = { + NumberFormat.getInstance(), + NumberFormat.getInstance(), + NumberFormat.getInstance(), + }; + private static final String padd = " "; + + static { + numFormat[0].setMaximumFractionDigits(0); + numFormat[0].setMinimumFractionDigits(0); + numFormat[1].setMaximumFractionDigits(1); + numFormat[1].setMinimumFractionDigits(1); + numFormat[2].setMaximumFractionDigits(2); + numFormat[2].setMinimumFractionDigits(2); + } + + // padd number from left + // numFracDigits must be 0 or 1 or 2. + public static String format(int numFracDigits, float f, String col) { + String res = padd + numFormat[numFracDigits].format(f); + return res.substring(res.length() - col.length()); + } + + // padd number from right + // numFracDigits must be 0 or 1 r 2. + public static String formatpaddRight(int numFracDigits, float f, String col) { + String res = numFormat[numFracDigits].format(f) + padd; + return res.substring(0, col.length()); + } + + // padd number from left + public static String format(int n, String col) { + String res = padd + n; + return res.substring(res.length() - col.length()); + } + + // padd string from right + public static String format(String s, String col) { + return (s + padd).substring(0, col.length()); + } + + // padd string from left + public static String formatPaddLeft(String s, String col) { + String res = padd + s; + return res.substring(res.length() - col.length()); + } + +} Index: src/java/org/apache/lucene/benchmark/byTask/utils/package.html =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/utils/package.html (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/utils/package.html (revision 0) @@ -0,0 +1,5 @@ + + +Utilities used for the benchmark, and for the reports. + + Index: src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (revision 0) @@ -0,0 +1,223 @@ +package org.apache.lucene.benchmark.byTask.utils; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.FileReader; +import java.io.StreamTokenizer; +import java.util.ArrayList; + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.tasks.PerfTask; +import org.apache.lucene.benchmark.byTask.tasks.RepSumByPrefTask; +import org.apache.lucene.benchmark.byTask.tasks.TaskSequence; + +/** + * Test algorithm, as read from file + */ +public class Algorithm { + + private TaskSequence sequence = new TaskSequence(null,false); + + /** + * Read algorithm from file + * @param algFile file containing perf test instructions + * @param runData perf-run-data used at running the tasks. + * @throws Exception if errors while parsing the algorithm + */ + public Algorithm (File algFile, PerfRunData runData) throws Exception { + int maxDepthLogStart = runData.getConfig().get("task.max.depth.log",0); + TaskSequence currSequence = sequence; + PerfTask prevTask = null; + StreamTokenizer stok = new StreamTokenizer(new FileReader(algFile)); + stok.commentChar('#'); + stok.eolIsSignificant(false); + stok.ordinaryChar('"'); + stok.ordinaryChar('/'); + stok.ordinaryChar('('); + stok.ordinaryChar(')'); + boolean colonOk = false; + currSequence.setDepth(0,maxDepthLogStart); + String taskPackage = PerfTask.class.getPackage().getName() + "."; + + while (stok.nextToken() != StreamTokenizer.TT_EOF) { + switch(stok.ttype) { + + case StreamTokenizer.TT_WORD: + String s = stok.sval; + PerfTask task = (PerfTask) Class.forName(taskPackage+s+"Task").newInstance(); + task.setRunData(runData); + currSequence.addTask(task); + if (task instanceof RepSumByPrefTask) { + stok.nextToken(); + String prefix = stok.sval; + if (prefix==null || prefix.length()==0) { + throw new Exception("named report prefix problem - "+stok.toString()); + } + ((RepSumByPrefTask) task).setPrefix(prefix); + } + // check for task param: '(' someParam ')' + stok.nextToken(); + if (stok.ttype!='(') { + stok.pushBack(); + } else { + // get param, for tasks that supports 9a single) parameter. + stok.nextToken(); + switch (stok.ttype) { + case StreamTokenizer.TT_NUMBER: + task.setDoubleParam(stok.nval); + System.out.println("Number Param "+stok.nval+" for: "+task.getName()); + break; + case StreamTokenizer.TT_WORD: + task.setStringParam(stok.sval); + System.out.println("String Param "+stok.sval+" for: "+task.getName()); + break; + case StreamTokenizer.TT_EOF: + throw new Exception("unexpexted EOF: - "+stok.toString()); + default: + task.setCharParam((char)stok.ttype); + System.out.println("String Param "+stok.ttype+" for: "+task.getName()); + } + // verify closing ')' + stok.nextToken(); + } + + // --------------------------------------- + colonOk = false; prevTask = task; + break; + + default: + char c = (char)stok.ttype; + + switch(c) { + + case ':' : + if (!colonOk) throw new Exception("colon unexpexted: - "+stok.toString()); + colonOk = false; + // get repetitions number + stok.nextToken(); + if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expexted repetitions number: - "+stok.toString()); + ((TaskSequence)prevTask).setRepetitions((int)stok.nval); + // check for rate specification (ops/min) + stok.nextToken(); + if (stok.ttype!=':') { + stok.pushBack(); + } else { + // get rate number + stok.nextToken(); + if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expexted rate number: - "+stok.toString()); + // check for unit - min or sec, sec is default + stok.nextToken(); + if (stok.ttype!='/') { + stok.pushBack(); + ((TaskSequence)prevTask).setRate((int)stok.nval,false); // set rate per sec + } else { + stok.nextToken(); + if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expexted rate unit: 'min' or 'sec' - "+stok.toString()); + String unit = stok.sval.toLowerCase(); + if ("min".equals(unit)) { + ((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min + } else if ("sec".equals(unit)) { + ((TaskSequence)prevTask).setRate((int)stok.nval,false); // set rate per sec + } else { + throw new Exception("expexted rate unit: 'min' or 'sec' - "+stok.toString()); + } + } + } + colonOk = false; + break; + + case '{' : + case '[' : + // start serial sequence + TaskSequence seq2 = new TaskSequence(currSequence, c=='['); + seq2.setRunData(runData); + currSequence.addTask(seq2); + currSequence = seq2; + // check for sequence name + stok.nextToken(); + if (stok.ttype!='"') { + stok.pushBack(); + } else { + stok.nextToken(); + String name = stok.sval; + seq2.setName(name); + stok.nextToken(); + if (stok.ttype!='"' || name==null || name.length()==0) { + throw new Exception("sequence name problem - "+stok.toString()); + } + } + colonOk = false; + break; + + case '>' : + currSequence.setNoChildReport(); + case '}' : + case ']' : + // end sequence + colonOk = true; prevTask = currSequence; + currSequence = currSequence.getParent(); + break; + + } //switch(c) + break; + + } //switch(stok.ttype) + + } + + if (sequence != currSequence) { + throw new Exception("Unmatched sequences"); + } + + // remove redundant top level enclosing sequences + while (sequence.getRepetitions()==1 && sequence.getRate()==0) { + ArrayList t = sequence.getTasks(); + if (t!=null && t.size()==1) { + PerfTask p = (PerfTask) t.get(0); + if (p instanceof TaskSequence) { + sequence = (TaskSequence) p; + continue; + } + } + break; + } + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + String newline = System.getProperty("line.separator"); + StringBuffer sb = new StringBuffer(); + sb.append(sequence.toString()); + sb.append(newline); + return sb.toString(); + } + + /** + * Execute this algorithm + * @throws Exception + */ + public void execute() throws Exception { + sequence.doLogic(); + } + + +} + Index: src/java/org/apache/lucene/benchmark/byTask/utils/Config.java =================================================================== --- src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (revision 0) +++ src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (revision 0) @@ -0,0 +1,239 @@ +package org.apache.lucene.benchmark.byTask.utils; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Properties; +import java.util.StringTokenizer; + +/** + * Perf run configuration properties. + * Numeric peroperty containing ".", e.g. "10.100.5" is interpreted + * as array of numeric values. It is extracted once, on first use, and + * maintain an round number to return the appropriate value. + */ +public class Config { + + private int roundNumber = 0; + private Properties props; + private HashMap valByRound = new HashMap(); + private HashMap colForValByRound = new HashMap(); + + public Config (Properties props) { + this.props = props; + } + + /** + * Return a string property. + * @param name name of property. + * @param dflt default value. + * @return a string property. + */ + public String get (String name, String dflt) { + return props.getProperty(name,dflt); + } + + /** + * Return an int property. + * If the property contain ".", e.g. "10.100.5", it is interpreted + * as array of ints. It is extracted once, on first call + * to get() it, and a by-round-value is returned. + * @param name name of property + * @param dflt default value + * @return a int property. + */ + public int get (String name, int dflt) { + // use value by round if already parsed + int vals[] = (int[]) valByRound.get(name); + if (vals != null) { + return vals[roundNumber % vals.length]; + } + // done if not by round + String sval = props.getProperty(name,""+dflt); + if (sval.indexOf(".")<0) { + return Integer.parseInt(sval); + } + // first time this prop is extracted by round + int k = sval.indexOf("."); + String colName = sval.substring(0,k); + sval = sval.substring(k+1); + colForValByRound.put(name,colName); + vals = propToIntArray(sval); + valByRound.put(name,vals); + return vals[roundNumber % vals.length]; + } + + /** + * Return a boolean property. + * If the property contain ".", e.g. "true.true.false", it is interpreted + * as array of boleans. It is extracted once, on first call + * to get() it, and a by-round-value is returned. + * @param name name of property + * @param dflt default value + * @return a int property. + */ + public boolean get (String name, boolean dflt) { + // use value by round if already parsed + boolean vals[] = (boolean[]) valByRound.get(name); + if (vals != null) { + return vals[roundNumber % vals.length]; + } + // done if not by round + String sval = props.getProperty(name,""+dflt); + if (sval.indexOf(".")<0) { + return Boolean.parseBoolean(sval); + } + // first time this prop is extracted by round + int k = sval.indexOf("."); + String colName = sval.substring(0,k); + sval = sval.substring(k+1); + colForValByRound.put(name,colName); + vals = propToBooleanArray(sval); + valByRound.put(name,vals); + return vals[roundNumber % vals.length]; + } + + /** + * Increment the round number, for config values that are extracted by round number. + * @return the new round number. + */ + public int newRound () { + roundNumber++; + + // log changes in values + if (valByRound.size()>0) { + StringBuffer sb = new StringBuffer("--> Round ").append(roundNumber-1).append("-->").append(roundNumber).append(": "); + for (Iterator iter = valByRound.keySet().iterator(); iter.hasNext();) { + String name = (String) iter.next(); + Object a = valByRound.get(name); + if (a instanceof int[]) { + int ai[] = (int[]) a; + int n1 = (roundNumber-1)%ai.length; + int n2 = roundNumber%ai.length; + sb.append(" ").append(name).append(":").append(ai[n1]).append("-->").append(ai[n2]); + } else { + boolean ab[] = (boolean[]) a; + int n1 = (roundNumber-1)%ab.length; + int n2 = roundNumber%ab.length; + sb.append(" ").append(name).append(":").append(ab[n1]).append("-->").append(ab[n2]); + } + } + System.out.println(); + System.out.println(sb.toString()); + System.out.println(); + } + + return roundNumber; + } + + // extract properties to array, e.g. for "10.100.5" return int[]{10,100,5}. + private int[] propToIntArray (String s) { + if (!s.contains(".")) { + return new int [] { Integer.parseInt(s) }; + } + + ArrayList a = new ArrayList(); + StringTokenizer st = new StringTokenizer(s,"."); + while (st.hasMoreTokens()) { + String t = st.nextToken(); + a.add(new Integer(t)); + } + int res[] = new int[a.size()]; + for (int i=0; i + +Utilities used for the benchmark, and for the reports. + + Index: src/java/org/apache/lucene/benchmark/standard/StandardOptions.java =================================================================== --- src/java/org/apache/lucene/benchmark/standard/StandardOptions.java (revision 475655) +++ src/java/org/apache/lucene/benchmark/standard/StandardOptions.java (working copy) @@ -44,7 +44,6 @@ /** * How often to print out log messages when in benchmark loops - * @return */ public int getLogStep() { @@ -58,7 +57,6 @@ /** * The number of times to run the benchmark - * @return */ public int getRunCount() { @@ -71,8 +69,7 @@ } /** - * - * @return + * Return the scale up. */ public int getScaleUp() { Index: src/java/org/apache/lucene/benchmark/stats/TestRunData.java =================================================================== --- src/java/org/apache/lucene/benchmark/stats/TestRunData.java (revision 475655) +++ src/java/org/apache/lucene/benchmark/stats/TestRunData.java (working copy) @@ -123,7 +123,7 @@ return v; } - /** Get memory usage stats. for a given data type. */ + /** Get memory usage stats for a given data type. */ public MemUsage getMemUsage(String label) { Vector v = (Vector) data.get(label); if (v == null)