Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java (revision 558317) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java (working copy) @@ -158,6 +158,8 @@ private void doTestAllTasksSimpleParse(boolean parOrSeq, boolean par) { for (int i = 0; i < singleTaskAlgs.length; i++) { String testedTask = singleTaskAlgs[i]; + if (testedTask.equals(" WriteLineDoc ")) + continue; if (parOrSeq) { if (par) { testedTask = "[ " + testedTask + " ] : 2"; Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (revision 558317) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java (working copy) @@ -22,6 +22,8 @@ import java.io.IOException; import java.io.Reader; import java.util.ArrayList; +import java.util.List; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Properties; @@ -110,7 +112,9 @@ private void printProps() { System.out.println("------------> config properties:"); - for (Iterator it = props.keySet().iterator(); it.hasNext();) { + List propKeys = new ArrayList(props.keySet()); + Collections.sort(propKeys); + for (Iterator it = propKeys.iterator(); it.hasNext();) { String propName = (String) it.next(); System.out.println(propName + " = " + props.getProperty(propName)); } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (revision 558317) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (working copy) @@ -38,6 +38,8 @@ public static final int DEFAULT_MAX_BUFFERED = 10; public static final int DEFAULT_MAX_FIELD_LENGTH = 10000; public static final int DEFAULT_MERGE_PFACTOR = 10; + public static final int DEFAULT_RAM_FLUSH_MB = 0; + public static final boolean DEFAULT_AUTO_COMMIT = true; public OpenIndexTask(PerfRunData runData) { super(runData); Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 558317) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (working copy) @@ -42,19 +42,23 @@ Directory dir = getRunData().getDirectory(); Analyzer analyzer = getRunData().getAnalyzer(); - IndexWriter iw = new IndexWriter(dir, analyzer, true); - Config config = getRunData().getConfig(); boolean cmpnd = config.get("compound",true); int mrgf = config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR); int mxbf = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED); int mxfl = config.get("max.field.length",OpenIndexTask.DEFAULT_MAX_FIELD_LENGTH); + double flushAtRAMUsage = config.get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB); + boolean autoCommit = config.get("autocommit", OpenIndexTask.DEFAULT_AUTO_COMMIT); + IndexWriter iw = new IndexWriter(dir, autoCommit, analyzer, true); + iw.setUseCompoundFile(cmpnd); iw.setMergeFactor(mrgf); iw.setMaxBufferedDocs(mxbf); iw.setMaxFieldLength(mxfl); + if (flushAtRAMUsage > 0) + iw.setRAMBufferSizeMB(flushAtRAMUsage); getRunData().setIndexWriter(iw); return 1; Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 558317) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriter; /** @@ -46,6 +47,7 @@ // volatile data passed between setup(), doLogic(), tearDown(). private Document doc = null; + private long flushAtRAMUsage; /* * (non-Javadoc) @@ -71,7 +73,8 @@ } public int doLogic() throws Exception { - getRunData().getIndexWriter().addDocument(doc); + IndexWriter writer = getRunData().getIndexWriter(); + writer.addDocument(doc); return 1; } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java (revision 0) @@ -0,0 +1,140 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.feeds.DocMaker; +import org.apache.lucene.benchmark.byTask.feeds.BasicDocMaker; +import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + + +public class WriteLineDocTask extends PerfTask { + + /** + * Default value for property doc.add.log.step - indicating how often + * an "added N docs" message should be logged. + */ + public static final int DEFAULT_WRITELINE_DOC_LOG_STEP = 1000; + + public WriteLineDocTask(PerfRunData runData) { + super(runData); + Config config = runData.getConfig(); + String fileName = config.get("line.file.out", null); + if (fileName == null) + throw new RuntimeException("line.file.out must be set"); + try { + lineFileOut = new BufferedWriter(new FileWriter(fileName)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private int logStep = -1; + private int docSize = 0; + int count = 0; + private BufferedWriter lineFileOut; + + public final static String SEP = "\t"; + + /* + * (non-Javadoc) + * @see PerfTask#setup() + */ + public void setup() throws Exception { + super.setup(); + DocMaker docMaker = getRunData().getDocMaker(); + Document doc; + if (docSize > 0) { + doc = docMaker.makeDocument(docSize); + } else { + doc = docMaker.makeDocument(); + } + + Field f = doc.getField(BasicDocMaker.BODY_FIELD); + + String body, title, date; + if (f != null) + body = f.stringValue().replace('\t', ' '); + else + body = null; + + f = doc.getField("doctitle"); + if (f != null) + title = f.stringValue().replace('\t', ' '); + else + title = ""; + + f = doc.getField("docdate"); + if (f != null) + date = f.stringValue().replace('\t', ' '); + else + date = ""; + + if (body != null) { + lineFileOut.write(title, 0, title.length()); + lineFileOut.write(SEP); + lineFileOut.write(date, 0, date.length()); + lineFileOut.write(SEP); + lineFileOut.write(body, 0, body.length()); + lineFileOut.newLine(); + lineFileOut.flush(); + } + } + + public void tearDown() throws Exception { + log(++count); + super.tearDown(); + } + + public int doLogic() throws Exception { + return 1; + } + + private void log (int count) { + if (logStep<0) { + // init once per instance + logStep = getRunData().getConfig().get("doc.writeline.log.step", DEFAULT_WRITELINE_DOC_LOG_STEP); + } + if (logStep>0 && (count%logStep)==0) { + System.out.println("--> "+Thread.currentThread().getName()+" processed (add) "+count+" docs"); + } + } + + /** + * Set the params (docSize only) + * @param params docSize, or 0 for no limit. + */ + public void setParams(String params) { + super.setParams(params); + docSize = (int) Float.parseFloat(params); + } + + /* (non-Javadoc) + * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams() + */ + public boolean supportsParams() { + return true; + } +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java ___________________________________________________________________ Name: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirDocMaker.java (revision 558317) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirDocMaker.java (working copy) @@ -40,7 +40,7 @@ */ public class DirDocMaker extends BasicDocMaker { - private DateFormat dateFormat; + private ThreadLocal dateFormat = new ThreadLocal(); private File dataDir = null; private int iteration=0; @@ -148,11 +148,21 @@ if (inputFiles==null) { throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); } - // date format: 30-MAR-1987 14:22:36 - dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss",Locale.US); - dateFormat.setLenient(true); } + // get/initiate a thread-local simple date format (must do so + // because SimpleDateFormat is not thread-safe). + protected DateFormat getDateFormat () { + DateFormat df = (DateFormat) dateFormat.get(); + if (df == null) { + // date format: 30-MAR-1987 14:22:36.87 + df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US); + df.setLenient(true); + dateFormat.set(df); + } + return df; + } + protected DocData getNextDocData() throws Exception { File f = null; String name = null; @@ -184,7 +194,7 @@ reader.close(); addBytes(f.length()); - Date date = dateFormat.parse(dateStr.trim()); + Date date = getDateFormat().parse(dateStr.trim()); return new DocData(name, bodyBuf.toString(), title, null, date); } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (revision 558317) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (working copy) @@ -55,7 +55,9 @@ // leftovers are thread local, because it is unsafe to share residues between threads private ThreadLocal leftovr = new ThreadLocal(); - static final String BODY_FIELD = "body"; + public static final String BODY_FIELD = "body"; + public static final String TITLE_FIELD = "doctitle"; + public static final String DATE_FIELD = "docdate"; private long numBytes = 0; private long numUniqueBytes = 0; @@ -104,10 +106,10 @@ } if (docData.getDate()!=null) { String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND); - doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal)); + doc.add(new Field(DATE_FIELD, dateStr, storeVal, indexVal, termVecVal)); } if (docData.getTitle()!=null) { - doc.add(new Field("doctitle", docData.getTitle(), storeVal, indexVal, termVecVal)); + doc.add(new Field(TITLE_FIELD, docData.getTitle(), storeVal, indexVal, termVecVal)); } if (docData.getBody()!=null && docData.getBody().length()>0) { String bdy; @@ -188,7 +190,18 @@ boolean termVec = config.get("doc.term.vector",false); storeVal = (stored ? Field.Store.YES : Field.Store.NO); indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED); - termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO); + boolean termVecPositions = config.get("doc.term.vector.positions",false); + boolean termVecOffsets = config.get("doc.term.vector.offsets",false); + if (termVecPositions && termVecOffsets) + termVecVal = Field.TermVector.WITH_POSITIONS_OFFSETS; + else if (termVecPositions) + termVecVal = Field.TermVector.WITH_POSITIONS; + else if (termVecOffsets) + termVecVal = Field.TermVector.WITH_OFFSETS; + else if (termVec) + termVecVal = Field.TermVector.YES; + else + termVecVal = Field.TermVector.NO; storeBytes = config.get("doc.store.body.bytes", false); forever = config.get("doc.maker.forever",true); } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java (revision 0) @@ -0,0 +1,172 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field; +import org.apache.lucene.analysis.TokenStream; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.File; +import java.io.Reader; +import java.io.FileReader; +import java.io.FileFilter; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.Locale; +import java.util.Iterator; +import java.util.Stack; +import java.util.Comparator; +import java.util.Arrays; + + +/** + * A DocMaker reading one line at a time as a Document from + * a single file. + * + * Config properties: + * docs.file=<path to the file%gt; + */ +public class LineDocMaker extends BasicDocMaker { + + private BufferedReader fileIn; + + private ThreadLocal dateFormat = new ThreadLocal(); + private ThreadLocal docState = new ThreadLocal(); + + private String fileName; + + private class DocState { + Document doc; + Field bodyField; + Field titleField; + Field dateField; + + public DocState() { + + bodyField = new Field(BasicDocMaker.BODY_FIELD, + "", + storeVal, + Field.Index.TOKENIZED, + termVecVal); + titleField = new Field(BasicDocMaker.TITLE_FIELD, + "", + storeVal, + Field.Index.TOKENIZED, + termVecVal); + dateField = new Field(BasicDocMaker.TITLE_FIELD, + "", + storeVal, + Field.Index.TOKENIZED, + termVecVal); + + doc = new Document(); + doc.add(bodyField); + doc.add(titleField); + doc.add(dateField); + } + + final static String SEP = WriteLineDocTask.SEP; + + public Document setFields(String line) { + // title date body + int spot = line.indexOf(SEP); + titleField.setValue(line.substring(0, spot)); + int spot2 = line.indexOf(SEP, 1+spot); + dateField.setValue(line.substring(1+spot, spot2)); + bodyField.setValue(line.substring(1+spot2, line.length())); + return doc; + } + } + + /* (non-Javadoc) + * @see SimpleDocMaker#setConfig(java.util.Properties) + */ + public void setConfig(Config config) { + super.setConfig(config); + resetInputs(); + } + + protected DocData getNextDocData() throws Exception { + throw new RuntimeException("not implemented"); + } + + private DocState getDocState() { + DocState ds = (DocState) docState.get(); + if (ds == null) { + ds = new DocState(); + docState.set(ds); + } + return ds; + } + + public Document makeDocument() throws Exception { + + String line; + synchronized(this) { + while(true) { + line = fileIn.readLine(); + if (line == null) { + if (!forever) + throw new NoMoreDataException(); + else { + // Reset the file + openFile(); + } + } else { + break; + } + } + } + + return getDocState().setFields(line); + } + + public Document makeDocument(int size) throws Exception { + throw new RuntimeException("cannot change document size with LineDocMaker; please use DirDocMaker instead"); + } + + public synchronized void resetInputs() { + super.resetInputs(); + fileName = config.get("docs.file", null); + if (fileName == null) + throw new RuntimeException("docs.file must be set"); + openFile(); + } + + private void openFile() { + try { + if (fileIn != null) + fileIn.close(); + fileIn = new BufferedReader(new FileReader(fileName)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public int numUniqueTexts() { + return -1; + } +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java ___________________________________________________________________ Name: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (revision 558317) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (working copy) @@ -77,6 +77,7 @@ // analyzer (default is standard analyzer) analyzer = (Analyzer) Class.forName(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance(); + // doc maker docMaker = (DocMaker) Class.forName(config.get("doc.maker", "org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker")).newInstance(); Index: contrib/benchmark/build.xml =================================================================== --- contrib/benchmark/build.xml (revision 558317) +++ contrib/benchmark/build.xml (working copy) @@ -147,13 +147,14 @@ - + Working Directory: ${working.dir} +