Index: contrib/CHANGES.txt =================================================================== --- contrib/CHANGES.txt (revision 790543) +++ contrib/CHANGES.txt (working copy) @@ -35,6 +35,9 @@ 7. LUCENE-1576: Fix BrazilianAnalyzer to downcase tokens after StandardTokenizer so that stop words with mixed case are filtered out. (Rafael Cunha de Almeida, Douglas Campos via Mike McCandless) + + 8. LUCENE-1730: Fix TrecContentSource (benchmark) to use ISO-8859-1 when + reading the TREC files. (Shai Erera via ?) New features Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (revision 790543) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (working copy) @@ -182,7 +182,7 @@ } try { GZIPInputStream zis = new GZIPInputStream(new FileInputStream(f), 1 << 16); - reader = new BufferedReader(new InputStreamReader(zis), 1 << 16); + reader = new BufferedReader(new InputStreamReader(zis, "ISO-8859-1"), 1 << 16); return; } catch (Exception e) { retries++;