Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java	(revision 1304315)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java	(working copy)
@@ -99,5 +99,6 @@
       }    
     };
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
   }
 }
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java	(revision 1304315)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java	(working copy)
@@ -110,6 +110,7 @@
       }    
     };
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
     
     Analyzer b = new Analyzer() {
       @Override
@@ -119,5 +120,6 @@
       }    
     };
     checkRandomData(random, b, 10000*RANDOM_MULTIPLIER);
+    checkRandomData(random, b, 200*RANDOM_MULTIPLIER, 8192);
   }
 }
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java	(revision 1304315)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java	(working copy)
@@ -34,11 +34,11 @@
 
   private int minGram, maxGram;
   private int gramSize;
-  private int pos = 0;
+  private int pos;
   private int inLen; // length of the input AFTER trim()
   private int charsRead; // length of the input
   private String inStr;
-  private boolean started = false;
+  private boolean started;
   
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@@ -99,7 +99,7 @@
 
   /** Returns the next token in the stream, or null at EOS. */
   @Override
-  public final boolean incrementToken() throws IOException {
+  public boolean incrementToken() throws IOException {
     clearAttributes();
     if (!started) {
       started = true;
@@ -115,6 +115,20 @@
         charsRead += inc;
       }
       inStr = new String(chars, 0, charsRead).trim();  // remove any trailing empty strings 
+
+      if (charsRead == chars.length) {
+        // Read extra throwaway chars so that on end() we
+        // report the correct offset:
+        char[] throwaway = new char[1024];
+        while(true) {
+          final int inc = input.read(throwaway, 0, throwaway.length);
+          if (inc == -1) {
+            break;
+          }
+          charsRead += inc;
+        }
+      }
+
       inLen = inStr.length();
       if (inLen == 0) {
         return false;
@@ -138,22 +152,16 @@
   }
   
   @Override
-  public final void end() {
+  public void end() {
     // set final offset
     final int finalOffset = correctOffset(charsRead);
     this.offsetAtt.setOffset(finalOffset, finalOffset);
   }    
   
   @Override
-  public void reset(Reader input) throws IOException {
-    super.reset(input);
-  }
-
-  @Override
   public void reset() throws IOException {
     super.reset();
     started = false;
     pos = 0;
-    charsRead = 0;
   }
 }
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java	(revision 1304315)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java	(working copy)
@@ -73,7 +73,7 @@
   private int maxGram;
   private int gramSize;
   private Side side;
-  private boolean started = false;
+  private boolean started;
   private int inLen; // length of the input AFTER trim()
   private int charsRead; // length of the input
   private String inStr;
@@ -178,7 +178,7 @@
 
   /** Returns the next token in the stream, or null at EOS. */
   @Override
-  public final boolean incrementToken() throws IOException {
+  public boolean incrementToken() throws IOException {
     clearAttributes();
     // if we are just starting, read the whole input
     if (!started) {
@@ -188,13 +188,28 @@
       charsRead = 0;
       // TODO: refactor to a shared readFully somewhere:
       while (charsRead < chars.length) {
-        int inc = input.read(chars, charsRead, chars.length-charsRead);
+        final int inc = input.read(chars, charsRead, chars.length-charsRead);
         if (inc == -1) {
           break;
         }
         charsRead += inc;
       }
+
       inStr = new String(chars, 0, charsRead).trim();  // remove any trailing empty strings 
+
+      if (charsRead == chars.length) {
+        // Read extra throwaway chars so that on end() we
+        // report the correct offset:
+        char[] throwaway = new char[1024];
+        while(true) {
+          final int inc = input.read(throwaway, 0, throwaway.length);
+          if (inc == -1) {
+            break;
+          }
+          charsRead += inc;
+        }
+      }
+
       inLen = inStr.length();
       if (inLen == 0) {
         return false;
@@ -221,21 +236,15 @@
   }
   
   @Override
-  public final void end() {
+  public void end() {
     // set final offset
     final int finalOffset = correctOffset(charsRead);
     this.offsetAtt.setOffset(finalOffset, finalOffset);
   }    
 
   @Override
-  public void reset(Reader input) throws IOException {
-    super.reset(input);
-  }
-
-  @Override
   public void reset() throws IOException {
     super.reset();
     started = false;
-    charsRead = 0;
   }
 }
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java	(revision 1304315)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java	(working copy)
@@ -33,6 +33,7 @@
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util._TestUtil;
 
 /** 
@@ -359,12 +360,22 @@
   }
 
   private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple) throws IOException {
+
+    final LineFileDocs docs = new LineFileDocs(random);
+
     for (int i = 0; i < iterations; i++) {
       String text;
-      if (simple) { 
-        text = random.nextBoolean() ? _TestUtil.randomSimpleString(random, maxWordLength) : _TestUtil.randomHtmlishString(random, maxWordLength);
+
+      if (random.nextInt(10) == 7) {
+        text = docs.nextDoc().get("body");
+        if (text.length() > maxWordLength) {
+          text = text.substring(0, maxWordLength);
+        }
       } else {
-        switch(_TestUtil.nextInt(random, 0, 4)) {
+        if (simple) { 
+          text = random.nextBoolean() ? _TestUtil.randomSimpleString(random, maxWordLength) : _TestUtil.randomHtmlishString(random, maxWordLength);
+        } else {
+          switch(_TestUtil.nextInt(random, 0, 4)) {
           case 0: 
             text = _TestUtil.randomSimpleString(random, maxWordLength);
             break;
@@ -376,6 +387,7 @@
             break;
           default:
             text = _TestUtil.randomUnicodeString(random, maxWordLength);
+          }
         }
       }
 
