Index: contrib/CHANGES.txt
===================================================================
--- contrib/CHANGES.txt	(revision 790848)
+++ contrib/CHANGES.txt	(working copy)
@@ -35,6 +35,11 @@
  7. LUCENE-1576: Fix BrazilianAnalyzer to downcase tokens after
     StandardTokenizer so that stop words with mixed case are filtered
     out.  (Rafael Cunha de Almeida, Douglas Campos via Mike McCandless)
+    
+ 8. LUCENE-1730: Fix TrecContentSource (benchmark) to use ISO-8859-1 when 
+ 	reading the TREC files, unless a different encoding is specified. 
+ 	Additionally, ContentSource now supports a content.source.encoding parameter 
+   in the configuration file. (Shai Erera via ?)
 
 New features
 
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java	(revision 790848)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java	(working copy)
@@ -34,9 +34,9 @@
 /**
  * Represents content from a specified source, such as TREC, Reuters etc. A
  * {@link ContentSource} is responsible for creating {@link DocData} objects for
- * its documents to be consumed by {@link ToDeleteDocMaker}. It also keeps track of
- * various statistics, such as how many documents were generated, size in bytes
- * etc.
+ * its documents to be consumed by {@link ToDeleteDocMaker}. It also keeps track
+ * of various statistics, such as how many documents were generated, size in
+ * bytes etc.
  * <p>
  * Supports the following configuration parameters:
  * <ul>
@@ -44,6 +44,9 @@
  * forever (<b>default=true</b>).
  * <li><b>content.source.verbose</b> - specifies whether messages should be
  * output by the content source (<b>default=false</b>).
+ * <li><b>content.source.encoding</b> - specifies which encoding to use when
+ * reading the files of that content source. Certain implementations may define
+ * a default value if this parameter is not specified. (<b>default=null</b>).
  * <li><b>content.source.log.step</b> - specifies for how many documents a
  * message should be logged. If set to 0 it means no logging should occur.
  * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
@@ -71,6 +74,7 @@
   protected boolean forever;
   protected int logStep;
   protected boolean verbose;
+  protected String encoding;
   
   private CompressorStreamFactory csFactory = new CompressorStreamFactory();
 
@@ -196,6 +200,7 @@
     forever = config.get("content.source.forever", true);
     logStep = config.get("content.source.log.step", 0);
     verbose = config.get("content.source.verbose", false);
+    encoding = config.get("content.source.encoding", null);
   }
 
 }
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java	(revision 790848)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java	(working copy)
@@ -39,6 +39,7 @@
  * Config properties:
  * <ul>
  * <li>docs.file=&lt;path to the file&gt;
+ * <li>content.source.encoding - default to UTF-8.
  * </ul>
  */
 public class LineDocSource extends ContentSource {
@@ -54,7 +55,7 @@
         reader.close();
       }
       InputStream is = getInputStream(file);
-      reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE);
+      reader = new BufferedReader(new InputStreamReader(is, encoding), BUFFER_SIZE);
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
@@ -111,6 +112,9 @@
       throw new IllegalArgumentException("docs.file must be set");
     }
     file = new File(fileName).getAbsoluteFile();
+    if (encoding == null) {
+      encoding = "UTF-8";
+    }
   }
 
 }
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java	(revision 790848)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java	(working copy)
@@ -47,10 +47,11 @@
  * (<b>default=trec</b>).
  * <li><b>html.parser</b> - specifies the {@link HTMLParser} class to use for
  * parsing the TREC documents content (<b>default=DemoHTMLParser</b>).
+ * <li><b>content.source.encoding</b> - if not specified, ISO-8859-1 is used.
  * </ul>
  */
 public class TrecContentSource extends ContentSource {
-  // TODO (3.0): change StringBuffer to StringBuffer
+  // TODO (3.0): change StringBuffer to StringBuilder
 
   private static final class DateFormatInfo {
     DateFormat[] dfs;
@@ -181,8 +182,8 @@
         System.out.println("opening: " + f + " length: " + f.length());
       }
       try {
-        GZIPInputStream zis = new GZIPInputStream(new FileInputStream(f), 1 << 16);
-        reader = new BufferedReader(new InputStreamReader(zis), 1 << 16);
+        GZIPInputStream zis = new GZIPInputStream(new FileInputStream(f), BUFFER_SIZE);
+        reader = new BufferedReader(new InputStreamReader(zis, encoding), BUFFER_SIZE);
         return;
       } catch (Exception e) {
         retries++;
@@ -334,6 +335,9 @@
       // Should not get here. Throw runtime exception.
       throw new RuntimeException(e);
     }
+    if (encoding == null) {
+      encoding = "ISO-8859-1";
+    }
   }
 
 }
Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java
===================================================================
--- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java	(revision 790848)
+++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java	(working copy)
@@ -40,7 +40,7 @@
       this.forever = forever;
     }
     
-    protected void openNextFile() throws NoMoreDataException, IOException {
+    void openNextFile() throws NoMoreDataException, IOException {
       if (reader != null) {
         if (!forever) {
           throw new NoMoreDataException();
