+ * There are two main configurations for running a quality benchmark:
n.
+ * @param n requested precision point, must be at least 1 and at most {@link #MAX_POINTS}.
+ */
+ public double getPrecisionAt(int n) {
+ if (n<1 || n>MAX_POINTS) {
+ throw new IllegalArgumentException("n="+n+" - but it must be in [1,"+MAX_POINTS+"] range!");
+ }
+ if (n>numPoints) {
+ return (numPoints * pAt[(int)numPoints])/n;
+ }
+ return pAt[n];
+ }
+
+ /**
+ * Return the average precision at recall points: sum of precision at recall points / maxGoodPoints.
+ */
+ public double getAvp() {
+ return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
+ }
+
+ /**
+ * Return the recall: |{relevant hits}| / |{hits}|.
+ */
+ public double getRecall() {
+ return recall;
+ }
+
+ /**
+ * Log information on this QualityStats object.
+ * @param logger Logger.
+ * @param prefix prefix before each log line.
+ */
+ public void log(String title, int paddLines, PrintWriter logger, String prefix) {
+ for (int i=0; i+ * <top> + * <num> Number: nnn + * + * <title> title of the topic + * + * <desc> Description: + * description of the topic + * + * <narr> Narrative: + * "story" composed by assessors. + * + * </top> + *+ * Comment lines starting with '#' are ignored. + */ +public class TrecTopicsReader { + + private static final String newline = System.getProperty("line.separator"); + + /** + * Constructor for Trec's TopicsReader + */ + public TrecTopicsReader() { + super(); + } + + /** + * Read quality queries from trec format topics file. + * @param reader where queries are read from. + * @return the result quality queries. + * @throws IOException if cannot read the queries. + */ + public QualityQuery[] readQueries(BufferedReader reader) throws IOException { + ArrayList res = new ArrayList(); + StringBuffer sb; + try { + while (null!=(sb=read(reader,"
+ * Expected input format: + *
+ * qnum 0 doc-name is-relevant + *+ * Two sample lines: + *
+ * 19 0 doc303 1 + * 19 0 doc7295 0 + *+ * @param reader where judgments are read from. + * @throws IOException + */ + public TrecJudge (BufferedReader reader) throws IOException { + judgements = new HashMap(); + QRelJudgement curr = null; + String zero = "0"; + String line; + + try { + while (null!=(line=reader.readLine())) { + line = line.trim(); + if (line.length()==0 || '#'==line.charAt(0)) { + continue; + } + StringTokenizer st = new StringTokenizer(line); + String queryID = st.nextToken(); + st.nextToken(); + String docName = st.nextToken(); + boolean relevant = !zero.equals(st.nextToken()); + assert !st.hasMoreTokens() : "wrong format: "+line+" next: "+st.nextToken(); + if (relevant) { // only keep relevant docs + if (curr==null || !curr.queryID.equals(queryID)) { + curr = (QRelJudgement)judgements.get(queryID); + if (curr==null) { + curr = new QRelJudgement(queryID); + judgements.put(queryID,curr); + } + } + curr.addRelevandDoc(docName); + } + } + } finally { + reader.close(); + } + } + + // inherit javadocs + public boolean isRelevant(String docName, QualityQuery query) { + QRelJudgement qrj = (QRelJudgement) judgements.get(query.getQueryID()); + return qrj!=null && qrj.isRelevant(docName); + } + + /** single Judgement of a trec quality query */ + private static class QRelJudgement { + private String queryID; + private HashMap relevantDocs; + + QRelJudgement(String queryID) { + this.queryID = queryID; + relevantDocs = new HashMap(); + } + + public void addRelevandDoc(String docName) { + relevantDocs.put(docName,docName); + } + + boolean isRelevant(String docName) { + return relevantDocs.containsKey(docName); + } + + public int maxRecall() { + return relevantDocs.size(); + } + } + + // inherit javadocs + public boolean validateData(QualityQuery[] qq, PrintWriter logger) { + HashMap missingQueries = (HashMap) judgements.clone(); + ArrayList missingJudgements = new ArrayList(); + for (int i=0; i
docName is relevant for the given quality query.
+ * @param docName name of doc tested for relevancy.
+ * @param query tested quality query.
+ * @return true if relevant, false if not.
+ */
+ public boolean isRelevant(String docName, QualityQuery query);
+
+ /**
+ * Validate that queries and this Judge match each other.
+ * To be perfectly valid, this Judge must have some data for each and every
+ * input quality query, and must not have any data on any other quality query.
+ * Note: the quality benchmark run would not fail in case of imperfect
+ * validity, just a warning message would be logged.
+ * @param qq quality queries to be validated.
+ * @param logger if not null, validation issues are logged.
+ * @return true if perfectly valid, false if not.
+ */
+ public boolean validateData (QualityQuery qq[], PrintWriter logger);
+
+ /**
+ * Return the maximal recall for the input quality query.
+ * It is the number of relevant docs this Judge "knows" for the query.
+ * @param query the query whose maximal recall is needed.
+ */
+ public int maxRecall (QualityQuery query);
+
+}
Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
___________________________________________________________________
Name: svn:executable
+ *
Name: svn:eol-style
+ native
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java (revision 0)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java (revision 0)
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.util.Map;
+
+/**
+ * A QualityQuery has an ID and some name-value pairs.
+ * + * The ID allows to map the quality query with its judgements. + *
+ * The name-value pairs are used by a + * {@link org.apache.lucene.benchmark.quality.QualityQueryParser} + * to create a Lucene {@link org.apache.lucene.search.Query}. + *
+ * It is very likely that name-value-pairs would be mapped into fields in a Lucene query, + * but it is up to the QualityQueryParser how to map - e.g. all values in a single field, + * or each pair as its own field, etc., - and this of course must match the way the + * searched index was constructed. + */ +public class QualityQuery implements Comparable { + private String queryID; + private Map nameValPairs; + + /** + * Create a QualityQuery with given ID and name-value pairs. + * @param queryID ID of this quality query. + * @param nameValPairs the contents of this quality query. + */ + public QualityQuery(String queryID, Map nameValPairs) { + this.queryID = queryID; + this.nameValPairs = nameValPairs; + } + + /** + * Return all the names of name-value-pairs in this QualityQuery. + */ + public String[] getNames() { + return (String[]) nameValPairs.keySet().toArray(new String[0]); + } + + /** + * Return the value of a certain name-value pair. + * @param name the name whose value should be returned. + */ + public String getValue(String name) { + return (String) nameValPairs.get(name); + } + + /** + * Return the ID of this query. + * The ID allows to map the quality query with its judgements. + */ + public String getQueryID() { + return queryID; + } + + /* for a nicer sort of input queries before running them. + * Try first as ints, fall back to string if not int. */ + public int compareTo(Object o) { + QualityQuery other = (QualityQuery) o; + try { + // compare as ints when ids ints + int n = Integer.parseInt(queryID); + int nOther = Integer.parseInt(other.queryID); + return n - nOther; + } catch (NumberFormatException e) { + // fall back to string comparison + return queryID.compareTo(other.queryID); + } + } + +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java ___________________________________________________________________ Name: svn:executable + * Name: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html (revision 0) @@ -0,0 +1,65 @@ + +
++This package allows to benchmark search quality of a Lucene application. +
+In order to use this package you should provide: +
+For benchmarking TREC collections with TREC QRels, take a look at the +trec package. +
+Here is a sample code used to run the TREC 2006 queries 701-850 on the .Gov2 collection: + +
+ File topicsFile = new File("topics-701-850.txt");
+ File qrelsFile = new File("qrels-701-850.txt");
+ Searcher searcher = new IndexSearcher("index");
+
+ int maxResults = 1000;
+ String docNameField = "docname";
+
+ PrintWriter logger = new PrintWriter(System.out,true);
+
+ // use trec utilities to read trec topics into quality queries
+ TrecTopicsReader qReader = new TrecTopicsReader();
+ QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+
+ // prepare judge, with trec utilities that read from a QRels file
+ Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+
+ // validate topics & judgments match each other
+ judge.validateData(qqs, logger);
+
+ // set the parsing of quality queries into Lucene queries.
+ QualityQueryParser qqParser = new SimpleQQParser("title", "body");
+
+ // run the benchmark
+ QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+ SubmissionReport submitLog = null;
+ QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+
+ // print an avarage sum of the results
+ QualityStats avg = QualityStats.average(stats);
+ avg.log("SUMMARY",2,logger, " ");
+
+
++Some immediate ways to modify this program to your needs are: +