Index: contrib/benchmark/CHANGES.txt =================================================================== --- contrib/benchmark/CHANGES.txt (revision 797694) +++ contrib/benchmark/CHANGES.txt (working copy) @@ -4,6 +4,16 @@ $Id:$ +7/24/2009 + LUCENE-1595: Deprecate LineDocMaker and EnwikiDocMaker in favor of + using DocMaker directly, with content.source = LineDocSource or + EnwikiContentSource. NOTE: with this change, the "id" field from + the Wikipedia XML export is now indexed as the "docname" field + (previously it was indexed as "docid"). Additionaly, the + SearchWithSort task now accepts all types that SortField can accept + and no longer falls back to SortField.AUTO, which has been + deprecated. (Mike McCandless) + 7/20/2009 LUCENE-1755: Fix WriteLineDocTask to output a document if it contains either a title or body (or both). (Shai Erera via Mark Miller) Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 797694) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -299,7 +299,7 @@ } /** - * Test WriteLineDoc and LineDocMaker. + * Test WriteLineDoc and LineDocSource. */ public void testLineDocFile() throws Exception { File lineFile = new File(System.getProperty("tempDir"), "test.reuters.lines.txt"); @@ -334,7 +334,7 @@ String algLines2[] = { "# ----- properties ", "analyzer=org.apache.lucene.analysis.SimpleAnalyzer", - "doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker", + "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'), "content.source.forever=false", "doc.reuse.fields=false", @@ -355,7 +355,7 @@ iw.close(); IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory()); - assertEquals(numLines + " lines were were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs()); + assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs()); ir.close(); lineFile.delete(); Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (revision 797694) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (working copy) @@ -172,7 +172,7 @@ public void testCharsReplace() throws Exception { // WriteLineDocTask replaced only \t characters w/ a space, since that's its // separator char. However, it didn't replace newline characters, which - // resulted in errors in LineDocMaker. + // resulted in errors in LineDocSource. File file = new File(getWorkDir(), "one-line"); PerfRunData runData = createPerfRunData(file, false, null, NewLinesDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java (revision 797694) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java (working copy) @@ -1,145 +0,0 @@ -package org.apache.lucene.benchmark.byTask.feeds; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.util.Properties; - -import org.apache.commons.compress.compressors.CompressorStreamFactory; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.benchmark.BenchmarkTestCase; -import org.apache.lucene.benchmark.byTask.PerfRunData; -import org.apache.lucene.benchmark.byTask.tasks.AddDocTask; -import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask; -import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask; -import org.apache.lucene.benchmark.byTask.tasks.TaskSequence; -import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; -import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; - -/** Tests the functionality of {@link LineDocMaker}. */ -public class LineDocMakerTest extends BenchmarkTestCase { - - private static final CompressorStreamFactory csFactory = new CompressorStreamFactory(); - - private void createBZ2LineFile(File file) throws Exception { - OutputStream out = new FileOutputStream(file); - out = csFactory.createCompressorOutputStream("bzip2", out); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8")); - StringBuffer doc = new StringBuffer(); - doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append("body"); - writer.write(doc.toString()); - writer.newLine(); - writer.close(); - } - - private void createRegularLineFile(File file) throws Exception { - OutputStream out = new FileOutputStream(file); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8")); - StringBuffer doc = new StringBuffer(); - doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append("body"); - writer.write(doc.toString()); - writer.newLine(); - writer.close(); - } - - private void doIndexAndSearchTest(File file, boolean setBZCompress, - String bz2CompressVal) throws Exception { - - Properties props = new Properties(); - - // LineDocMaker specific settings. - props.setProperty("docs.file", file.getAbsolutePath()); - if (setBZCompress) { - props.setProperty("bzip.compression", bz2CompressVal); - } - - // Indexing configuration. - props.setProperty("analyzer", SimpleAnalyzer.class.getName()); - props.setProperty("doc.maker", LineDocMaker.class.getName()); - props.setProperty("directory", "RAMDirectory"); - - // Create PerfRunData - Config config = new Config(props); - PerfRunData runData = new PerfRunData(config); - - TaskSequence tasks = new TaskSequence(runData, "testBzip2", null, false); - tasks.addTask(new CreateIndexTask(runData)); - tasks.addTask(new AddDocTask(runData)); - tasks.addTask(new CloseIndexTask(runData)); - tasks.doLogic(); - - IndexSearcher searcher = new IndexSearcher(runData.getDirectory(), true); - TopDocs td = searcher.search(new TermQuery(new Term("body", "body")), 10); - assertEquals(1, td.totalHits); - assertNotNull(td.scoreDocs[0]); - searcher.close(); - } - - /* Tests LineDocMaker with a bzip2 input stream. */ - public void testBZip2() throws Exception { - File file = new File(getWorkDir(), "one-line.bz2"); - createBZ2LineFile(file); - doIndexAndSearchTest(file, true, "true"); - } - - public void testBZip2AutoDetect() throws Exception { - File file = new File(getWorkDir(), "one-line.bz2"); - createBZ2LineFile(file); - doIndexAndSearchTest(file, false, null); - } - - public void testRegularFile() throws Exception { - File file = new File(getWorkDir(), "one-line"); - createRegularLineFile(file); - doIndexAndSearchTest(file, false, null); - } - - public void testInvalidFormat() throws Exception { - String[] testCases = new String[] { - "", // empty line - "title", // just title - "title" + WriteLineDocTask.SEP, // title + SEP - "title" + WriteLineDocTask.SEP + "body", // title + SEP + body - // note that title + SEP + body + SEP is a valid line, which results in an - // empty body - }; - - for (int i = 0; i < testCases.length; i++) { - File file = new File(getWorkDir(), "one-line"); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "utf-8")); - writer.write(testCases[i]); - writer.newLine(); - writer.close(); - try { - doIndexAndSearchTest(file, false, null); - fail("Some exception should have been thrown for: [" + testCases[i] + "]"); - } catch (Exception e) { - // expected. - } - } - } - -} Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java (revision 797694) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java (working copy) @@ -39,8 +39,8 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -/** Tests the functionality of {@link LineDocMaker}. */ -public class LineDocMakerTest extends BenchmarkTestCase { +/** Tests the functionality of {@link LineDocSource}. */ +public class LineDocSourceTest extends BenchmarkTestCase { private static final CompressorStreamFactory csFactory = new CompressorStreamFactory(); @@ -70,7 +70,7 @@ Properties props = new Properties(); - // LineDocMaker specific settings. + // LineDocSource specific settings. props.setProperty("docs.file", file.getAbsolutePath()); if (setBZCompress) { props.setProperty("bzip.compression", bz2CompressVal); @@ -78,7 +78,7 @@ // Indexing configuration. props.setProperty("analyzer", SimpleAnalyzer.class.getName()); - props.setProperty("doc.maker", LineDocMaker.class.getName()); + props.setProperty("content.source", LineDocSource.class.getName()); props.setProperty("directory", "RAMDirectory"); // Create PerfRunData @@ -98,7 +98,7 @@ searcher.close(); } - /* Tests LineDocMaker with a bzip2 input stream. */ + /* Tests LineDocSource with a bzip2 input stream. */ public void testBZip2() throws Exception { File file = new File(getWorkDir(), "one-line.bz2"); createBZ2LineFile(file); Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (revision 797694) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (working copy) @@ -56,6 +56,8 @@ SortField sortField0; if (field.equals("doc")) { sortField0 = SortField.FIELD_DOC; + } if (field.equals("score")) { + sortField0 = SortField.FIELD_SCORE; } else if (field.equals("noscore")) { doScore = false; continue; @@ -90,14 +92,22 @@ int type; if (typeString.equals("float")) { type = SortField.FLOAT; + } else if (typeString.equals("double")) { + type = SortField.DOUBLE; + } else if (typeString.equals("byte")) { + type = SortField.BYTE; + } else if (typeString.equals("short")) { + type = SortField.SHORT; } else if (typeString.equals("int")) { type = SortField.INT; + } else if (typeString.equals("long")) { + type = SortField.LONG; } else if (typeString.equals("string")) { type = SortField.STRING; } else if (typeString.equals("string_val")) { type = SortField.STRING_VAL; } else { - type = SortField.AUTO; + throw new RuntimeException("Unrecognized sort field type " + typeString); } return type; } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 797694) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (working copy) @@ -24,6 +24,7 @@ import java.util.Map; import java.util.Properties; import java.util.Map.Entry; +import java.util.Random; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Format; @@ -60,6 +61,9 @@ *