Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java (revision 1488980)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java (working copy)
@@ -31,15 +31,15 @@
*
* <top>
* <num> Number: nnn
- *
+ *
* <title> title of the topic
- *
+ *
* <desc> Description:
* description of the topic
- *
+ *
* <narr> Narrative:
* "story" composed by assessors.
- *
+ *
* </top>
*
* Comment lines starting with '#' are ignored.
@@ -47,7 +47,7 @@
public class TrecTopicsReader {
private static final String newline = System.getProperty("line.separator");
-
+
/**
* Constructor for Trec's TopicsReader
*/
@@ -74,12 +74,22 @@
// title
sb = read(reader,"",null,true,false);
k = sb.indexOf(">");
- String title = sb.substring(k+1).trim();
- // description
- read(reader,"",null,false,false);
+ String titleFristLine = sb.substring(k+1).trim();
sb.setLength(0);
+ sb.append(titleFristLine);
+ // Title can be multi line..
String line = null;
while ((line = reader.readLine()) != null) {
+ if (line.startsWith(""))
+ break;
+ if (sb.length() > 0) sb.append(' ');
+ sb.append(line);
+ }
+ String title = sb.toString().trim();
+
+ // description
+ sb.setLength(0);
+ while ((line = reader.readLine()) != null) {
if (line.startsWith(""))
break;
if (sb.length() > 0) sb.append(' ');
@@ -132,7 +142,6 @@
sep = newline;
}
}
- //System.out.println("read: "+sb);
return sb;
}
}
Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt
===================================================================
--- lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt (revision 1488980)
+++ lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt (working copy)
@@ -274,7 +274,8 @@
Number: 19
- 20 while common week
+
+20 while common week
Description:
Topic 19 Description Line 1