+ * <top> + * <num> Number: nnn + * + * <title> title of the topic + * + * <desc> Description: + * description of the topic + * + * <narr> Narrative: + * "story" composed by assessors. + * + * </top> + *+ * Comment lines starting with '#' are ignored. + */ +public class TopicsReader implements QualityQueriesReader { + + private static final String newline = System.getProperty("line.separator"); + + /** + * Constructor for Trec's TopicsReader + */ + public TopicsReader() { + super(); + } + + /* (non-Javadoc) + * @see org.apache.lucene.benchmark.quality.utils.QualityQueriesReader#readQueries(java.io.BufferedReader) + */ + public QualityQuery[] readQueries(BufferedReader reader) throws IOException { + ArrayList res = new ArrayList(); + StringBuffer sb; + try { + while (null!=(sb=read(reader,"
+ * Expected input format: + *
+ * qnum 0 doc-name is-relevant + *+ * Two sample lines: + *
+ * 19 0 doc303 1 + * 19 0 doc7295 0 + *+ * @param reader + * @throws IOException + */ + public JudgeTrec (BufferedReader reader) throws IOException { + judgements = new HashMap(); + QRelJudgement curr = null; + String zero = "0"; + String line; + + try { + while (null!=(line=reader.readLine())) { + line = line.trim(); + if (line.length()==0 || '#'==line.charAt(0)) { + continue; + } + StringTokenizer st = new StringTokenizer(line); + String queryID = st.nextToken(); + st.nextToken(); + String docName = st.nextToken(); + boolean relevant = !zero.equals(st.nextToken()); + assert !st.hasMoreTokens() : "wrong format: "+line+" next: "+st.nextToken(); + if (relevant) { // only keep relevant docs + if (curr==null || !curr.queryID.equals(queryID)) { + curr = (QRelJudgement)judgements.get(queryID); + if (curr==null) { + curr = new QRelJudgement(queryID); + judgements.put(queryID,curr); + } + } + curr.addRelevandDoc(docName); + } + } + } finally { + reader.close(); + } + } + + public boolean isRelevant(String docName, QualityQuery query) { + QRelJudgement qrj = (QRelJudgement) judgements.get(query.getQueryID()); + return qrj!=null && qrj.isRelevant(docName); + } + + /** single Judgement of a trec quality query */ + private static class QRelJudgement { + private String queryID; + private HashMap relevantDocs; + + QRelJudgement(String queryID) { + this.queryID = queryID; + relevantDocs = new HashMap(); + } + + public void addRelevandDoc(String docName) { + relevantDocs.put(docName,docName); + } + + boolean isRelevant(String docName) { + return relevantDocs.containsKey(docName); + } + + public int maxRecall() { + return relevantDocs.size(); + } + } + + public boolean validateData(QualityQuery[] qq, PrintWriter logger) { + HashMap missingQueries = (HashMap) judgements.clone(); + ArrayList missingJudgements = new ArrayList(); + for (int i=0; i
+ * The ID allows to map the quality query with its judgements. + * The fields are named parts of the quality query representation. + *
+ * Borrowing from TREC notions, possible field names may be "title", + * "decription", etc., but anything goes, and so a quality query does + * not have to be a TREC query. + */ +public class QualityQuery implements Comparable { + private String queryID; + private Map queryFields; + + public QualityQuery(String queryID, Map queryFields) { + this.queryID = queryID; + this.queryFields = queryFields; + } + + public String[] getFieldNames() { + return (String[]) queryFields.keySet().toArray(new String[0]); + } + + public String getField(String fieldName) { + return (String) queryFields.get(fieldName); + } + + public String getQueryID() { + return queryID; + } + + // for a nicer sort of input queries before running them + public int compareTo(Object o) { + QualityQuery other = (QualityQuery) o; + try { + // compare as ints when ids ints + int n = Integer.parseInt(queryID); + int nOther = Integer.parseInt(other.queryID); + return n - nOther; + } catch (NumberFormatException e) { + // fall back to string comparison + return queryID.compareTo(other.queryID); + } + } + +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java ___________________________________________________________________ Name: svn:executable + * Name: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (revision 551794) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java (working copy) @@ -22,6 +22,9 @@ import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; import java.io.File; import java.io.UnsupportedEncodingException; @@ -97,10 +100,10 @@ private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException { int docid = incrNumDocsCreated(); Document doc = new Document(); - doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal)); + doc.add(new Field("docid", "doc"+docid, Store.YES, Index.NO, TermVector.NO)); if (docData.getName()!=null) { String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt); - doc.add(new Field("docname", name, storeVal, indexVal, termVecVal)); + doc.add(new Field("docname", name, Store.YES, Index.NO, TermVector.NO)); } if (docData.getDate()!=null) { String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);