Index: modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java	(revision 1307543)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java	(working copy)
@@ -33,6 +33,8 @@
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.tokenattributes.*;
 import org.apache.lucene.util.CharsRef;
@@ -430,6 +432,57 @@
     }
   }
   
+  // Adds MockGraphTokenFilter before SynFilter:
+  public void testRandom2GraphBefore() throws Exception {
+    final int numIters = atLeast(10);
+    for (int i = 0; i < numIters; i++) {
+      b = new SynonymMap.Builder(random.nextBoolean());
+      final int numEntries = atLeast(10);
+      for (int j = 0; j < numEntries; j++) {
+        add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
+      }
+      final SynonymMap map = b.build();
+      final boolean ignoreCase = random.nextBoolean();
+      
+      final Analyzer analyzer = new Analyzer() {
+        @Override
+        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+          TokenStream graph = new MockGraphTokenFilter(random, tokenizer);
+          return new TokenStreamComponents(tokenizer, new SynonymFilter(graph, map, ignoreCase));
+        }
+      };
+
+      checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER);
+    }
+  }
+
+  // Adds MockGraphTokenFilter after SynFilter:
+  public void testRandom2GraphAfter() throws Exception {
+    final int numIters = atLeast(10);
+    for (int i = 0; i < numIters; i++) {
+      b = new SynonymMap.Builder(random.nextBoolean());
+      final int numEntries = atLeast(10);
+      for (int j = 0; j < numEntries; j++) {
+        add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
+      }
+      final SynonymMap map = b.build();
+      final boolean ignoreCase = random.nextBoolean();
+      
+      final Analyzer analyzer = new Analyzer() {
+        @Override
+        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+          TokenStream syns = new SynonymFilter(tokenizer, map, ignoreCase);
+          TokenStream graph = new MockGraphTokenFilter(random, syns);
+          return new TokenStreamComponents(tokenizer, graph);
+        }
+      };
+
+      checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER);
+    }
+  }
+  
   public void testEmptyTerm() throws IOException {
     final int numIters = atLeast(10);
     for (int i = 0; i < numIters; i++) {
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java	(revision 1307543)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java	(working copy)
@@ -1,17 +1,5 @@
 package org.apache.lucene.analysis.core;
 
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.util.Version;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Arrays;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -29,6 +17,20 @@
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockGraphTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+
 public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
   
   public void testHugeDoc() throws IOException {
@@ -247,4 +249,18 @@
   public void testRandomHugeStrings() throws Exception {
     checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
   }
+
+  // Adds random graph after:
+  public void testRandomHugeStringsGraphAfter() throws Exception {
+    checkRandomData(random,
+                    new Analyzer() {
+                      @Override
+                      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+                        Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+                        TokenStream tokenStream = new MockGraphTokenFilter(random, tokenizer);
+                        return new TokenStreamComponents(tokenizer, tokenStream);
+                      }
+                    },
+                    200*RANDOM_MULTIPLIER, 8192);
+  }
 }
Index: modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
===================================================================
--- modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java	(revision 1307543)
+++ modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java	(working copy)
@@ -26,6 +26,7 @@
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
@@ -190,7 +191,21 @@
     checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
     checkRandomData(random, analyzerNoPunct, 200*RANDOM_MULTIPLIER, 8192);
   }
-  
+
+  public void testRandomHugeStringsMockGraphAfter() throws Exception {
+    // Randomly inject graph tokens after JapaneseTokenizer:
+    checkRandomData(random,
+                    new Analyzer() {
+                      @Override
+                      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+                        Tokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH);
+                        TokenStream graph = new MockGraphTokenFilter(random, tokenizer);
+                        return new TokenStreamComponents(tokenizer, graph);
+                      }
+                    },
+                    200*RANDOM_MULTIPLIER, 8192);
+  }
+
   public void testLargeDocReliability() throws Exception {
     for (int i = 0; i < 100; i++) {
       String s = _TestUtil.randomUnicodeString(random, 10000);
Index: lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java	(revision 0)
+++ lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java	(working copy)
@@ -0,0 +1,391 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.RegExp;
+
+public class TestGraphTokenizers extends BaseTokenStreamTestCase {
+
+  // Makes a graph TokenStream from the string; separate
+  // positions with single space, multiple tokens at the same
+  // position with /, and add optional position length with
+  // :.  EG "a b c" is a simple chain, "a/x b c" adds 'x'
+  // over 'a' at position 0 with posLen=1, "a/x:3 b c" adds
+  // 'x' over a with posLen=3.  Tokens are in normal-form!
+  // So, offsets are computed based on the first token at a
+  // given position.  NOTE: each token must be a single
+  // character!  We assume this when computing offsets...
+  
+  // NOTE: all input tokens must be length 1!!!  This means
+  // you cannot turn on MockCharFilter when random
+  // testing...
+
+  private static class GraphTokenizer extends Tokenizer {
+    private List<Token> tokens;
+    private int upto;
+    private int inputLength;
+
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+    private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
+
+    public GraphTokenizer(Reader input) {
+      super(input);
+    }
+
+    @Override
+    public void reset() {
+      tokens = null;
+      upto = 0;
+    }
+
+    @Override
+    public boolean incrementToken() throws IOException {
+      if (tokens == null) {
+        fillTokens();
+      }
+      //System.out.println("graphTokenizer: incr upto=" + upto + " vs " + tokens.size());
+      if (upto == tokens.size()) {
+        //System.out.println("  END @ " + tokens.size());
+        return false;
+      } 
+      final Token t = tokens.get(upto++);
+      //System.out.println("  return token=" + t);
+      clearAttributes();
+      termAtt.append(t.toString());
+      offsetAtt.setOffset(t.startOffset(), t.endOffset());
+      posIncrAtt.setPositionIncrement(t.getPositionIncrement());
+      posLengthAtt.setPositionLength(t.getPositionLength());
+      return true;
+    }
+
+    @Override
+    public void end() throws IOException {
+      super.end();
+      // NOTE: somewhat... hackish, but we need this to
+      // satisfy BTSTC:
+      final int lastOffset;
+      if (tokens != null && !tokens.isEmpty()) {
+        lastOffset = tokens.get(tokens.size()-1).endOffset();
+      } else {
+        lastOffset = 0;
+      }
+      offsetAtt.setOffset(correctOffset(lastOffset),
+                          correctOffset(inputLength));
+    }
+
+    private void fillTokens() throws IOException {
+      final StringBuilder sb = new StringBuilder();
+      final char[] buffer = new char[256];
+      while (true) {
+        final int count = input.read(buffer);
+        if (count == -1) {
+          break;
+        }
+        sb.append(buffer, 0, count);
+        //System.out.println("got count=" + count);
+      }
+      //System.out.println("fillTokens: " + sb);
+
+      inputLength = sb.length();
+
+      final String[] parts = sb.toString().split(" ");
+
+      tokens = new ArrayList<Token>();
+      int pos = 0;
+      int maxPos = -1;
+      int offset = 0;
+      //System.out.println("again");
+      for(String part : parts) {
+        final String[] overlapped = part.split("/");
+        boolean firstAtPos = true;
+        int minPosLength = Integer.MAX_VALUE;
+        for(String part2 : overlapped) {
+          final int colonIndex = part2.indexOf(':');
+          final String token;
+          final int posLength;
+          if (colonIndex != -1) {
+            token = part2.substring(0, colonIndex);
+            posLength = Integer.parseInt(part2.substring(1+colonIndex));
+          } else {
+            token = part2;
+            posLength = 1;
+          }
+          maxPos = Math.max(maxPos, pos + posLength);
+          minPosLength = Math.min(minPosLength, posLength);
+          final Token t = new Token(token, offset, offset + 2*posLength - 1);
+          t.setPositionLength(posLength);
+          t.setPositionIncrement(firstAtPos ? 1:0);
+          firstAtPos = false;
+          //System.out.println("  add token=" + t + " startOff=" + t.startOffset() + " endOff=" + t.endOffset());
+          tokens.add(t);
+        }
+        pos += minPosLength;
+        offset = 2 * pos;
+      }
+      assert maxPos <= pos: "input string mal-formed: posLength>1 tokens hang over the end";
+    }
+  }
+
+  public void testMockGraphTokenFilterBasic() throws Exception {
+
+    for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      // Make new analyzer each time, because MGTF has fixed
+      // seed:
+      final Analyzer a = new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+            final Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            final TokenStream t2 = new MockGraphTokenFilter(random, t);
+            return new TokenStreamComponents(t, t2);
+          }
+        };
+      
+      checkAnalysisConsistency(random, a, false, "a b c d e f g h i j k");
+    }
+  }
+
+  public void testMockGraphTokenFilterOnGraphInput() throws Exception {
+    for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      // Make new analyzer each time, because MGTF has fixed
+      // seed:
+      final Analyzer a = new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+            final Tokenizer t = new GraphTokenizer(reader);
+            final TokenStream t2 = new MockGraphTokenFilter(random, t);
+            return new TokenStreamComponents(t, t2);
+          }
+        };
+      
+      checkAnalysisConsistency(random, a, false, "a/x:3 c/y:2 d e f/z:4 g h i j k");
+    }
+  }
+
+  // Just deletes (leaving hole) token 'a':
+  private final static class RemoveATokens extends TokenFilter {
+    private int pendingPosInc;
+
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+
+    public RemoveATokens(TokenStream in) {
+      super(in);
+    }
+
+    @Override
+    public void reset() throws IOException {
+      super.reset();
+      pendingPosInc = 0;
+    }
+
+    @Override
+    public void end() throws IOException {
+      super.end();
+      posIncAtt.setPositionIncrement(pendingPosInc + posIncAtt.getPositionIncrement());
+    }
+
+    @Override
+    public boolean incrementToken() throws IOException {
+      while (true) {
+        final boolean gotOne = input.incrementToken();
+        if (!gotOne) {
+          return false;
+        } else if (termAtt.toString().equals("a")) {
+          pendingPosInc += posIncAtt.getPositionIncrement();
+        } else {
+          posIncAtt.setPositionIncrement(pendingPosInc + posIncAtt.getPositionIncrement());
+          pendingPosInc = 0;
+          return true;
+        }
+      }
+    }
+  }
+
+  public void testMockGraphTokenFilterBeforeHoles() throws Exception {
+    for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      // Make new analyzer each time, because MGTF has fixed
+      // seed:
+      final Analyzer a = new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+            final Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            final TokenStream t2 = new MockGraphTokenFilter(random, t);
+            final TokenStream t3 = new RemoveATokens(t2);
+            return new TokenStreamComponents(t, t3);
+          }
+        };
+
+      checkAnalysisConsistency(random, a, false, "a b c d e f g h i j k");
+      checkAnalysisConsistency(random, a, false, "x y a b c d e f g h i j k");
+      checkAnalysisConsistency(random, a, false, "a b c d e f g h i j k a");
+      checkAnalysisConsistency(random, a, false, "a b c d e f g h i j k a x y");
+    }
+  }
+
+  public void testMockGraphTokenFilterAfterHoles() throws Exception {
+    for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      // Make new analyzer each time, because MGTF has fixed
+      // seed:
+      final Analyzer a = new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+            final Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            final TokenStream t2 = new RemoveATokens(t);
+            final TokenStream t3 = new MockGraphTokenFilter(random, t2);
+            return new TokenStreamComponents(t, t3);
+          }
+        };
+
+      checkAnalysisConsistency(random, a, false, "a b c d e f g h i j k");
+      checkAnalysisConsistency(random, a, false, "x y a b c d e f g h i j k");
+      checkAnalysisConsistency(random, a, false, "a b c d e f g h i j k a");
+      checkAnalysisConsistency(random, a, false, "a b c d e f g h i j k a x y");
+    }
+  }
+
+  public void testMockGraphTokenFilterRandom() throws Exception {
+    for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      // Make new analyzer each time, because MGTF has fixed
+      // seed:
+      final Analyzer a = new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+            final Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            final TokenStream t2 = new MockGraphTokenFilter(random, t);
+            return new TokenStreamComponents(t, t2);
+          }
+        };
+      
+      checkRandomData(random, a, 5, atLeast(1000));
+    }
+  }
+
+  // Two MockGraphTokenFilters
+  public void testDoubleMockGraphTokenFilterRandom() throws Exception {
+    for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      // Make new analyzer each time, because MGTF has fixed
+      // seed:
+      final Analyzer a = new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+            final Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            final TokenStream t1 = new MockGraphTokenFilter(random, t);
+            final TokenStream t2 = new MockGraphTokenFilter(random, t1);
+            return new TokenStreamComponents(t, t2);
+          }
+        };
+      
+      checkRandomData(random, a, 5, atLeast(1000));
+    }
+  }
+
+  public void testMockGraphTokenFilterBeforeHolesRandom() throws Exception {
+    for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      // Make new analyzer each time, because MGTF has fixed
+      // seed:
+      final Analyzer a = new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+            final Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            final TokenStream t1 = new MockGraphTokenFilter(random, t);
+            final TokenStream t2 = new MockHoleInjectingTokenFilter(random, t1);
+            return new TokenStreamComponents(t, t2);
+          }
+        };
+      
+      checkRandomData(random, a, 5, atLeast(1000));
+    }
+  }
+
+  public void testMockGraphTokenFilterAfterHolesRandom() throws Exception {
+    for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter);
+      }
+
+      // Make new analyzer each time, because MGTF has fixed
+      // seed:
+      final Analyzer a = new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+            final Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            final TokenStream t1 = new MockHoleInjectingTokenFilter(random, t);
+            final TokenStream t2 = new MockGraphTokenFilter(random, t1);
+            return new TokenStreamComponents(t, t2);
+          }
+        };
+      
+      checkRandomData(random, a, 5, atLeast(1000));
+    }
+  }
+}

Property changes on: lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
Index: lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java	(revision 1307543)
+++ lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java	(working copy)
@@ -59,6 +59,7 @@
       @Override
       public TokenStreamComponents createComponents(String fieldName, Reader reader) {
         return new TokenStreamComponents(new Tokenizer(reader) {
+          // TODO: use CannedTokenStream
           private final String[] TOKENS = {"1", "2", "3", "4", "5"};
           private final int[] INCREMENTS = {0, 2, 1, 0, 1};
           private int i = 0;
Index: lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java	(revision 1307543)
+++ lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java	(working copy)
@@ -40,6 +40,10 @@
   /** Set the starting and ending offset.
     @see #startOffset() and #endOffset()*/
   public void setOffset(int startOffset, int endOffset) {
+    // TODO: check that these are valid!  IE, each should be
+    // >= 0, and endOffset should be >= startOffset.
+    // Problem is this could "break" existing
+    // tokenizers/filters.
     this.startOffset = startOffset;
     this.endOffset = endOffset;
   }
Index: lucene/core/src/java/org/apache/lucene/analysis/Token.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/analysis/Token.java	(revision 1307543)
+++ lucene/core/src/java/org/apache/lucene/analysis/Token.java	(working copy)
@@ -22,6 +22,7 @@
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.index.Payload;
 import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
@@ -121,13 +122,14 @@
 */
 public class Token extends CharTermAttributeImpl 
                    implements TypeAttribute, PositionIncrementAttribute,
-                              FlagsAttribute, OffsetAttribute, PayloadAttribute {
+                              FlagsAttribute, OffsetAttribute, PayloadAttribute, PositionLengthAttribute {
 
   private int startOffset,endOffset;
   private String type = DEFAULT_TYPE;
   private int flags;
   private Payload payload;
   private int positionIncrement = 1;
+  private int positionLength = 1;
 
   /** Constructs a Token will null text. */
   public Token() {
@@ -270,6 +272,20 @@
     return positionIncrement;
   }
 
+  /** Set the position length.
+   * @see PositionLengthAttribute */
+  @Override
+  public void setPositionLength(int positionLength) {
+    this.positionLength = positionLength;
+  }
+
+  /** Get the position length.
+   * @see PositionLengthAttribute */
+  @Override
+  public int getPositionLength() {
+    return positionLength;
+  }
+
   /** Returns this Token's starting offset, the position of the first character
     corresponding to this token in the source text.
 
Index: lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java	(revision 0)
+++ lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java	(working copy)
@@ -0,0 +1,134 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// nocommit need standalone test here:
+
+// TODO: cutover kuromoji to this too
+
+/** Acts like forever growing T[], but internally uses a
+ *  circular buffer to reuse instances of T.
+ * 
+ *  @lucene.internal */
+public abstract class RollingBuffer<T extends RollingBuffer.Resettable> {
+
+  public static interface Resettable {
+    public void reset();
+  }
+
+  @SuppressWarnings("unchecked") private T[] buffer = (T[]) new RollingBuffer.Resettable[8];
+
+  // Next array index to write to:
+  private int nextWrite;
+
+  // Next position to write:
+  private int nextPos;
+
+  // How many valid Position are held in the
+  // array:
+  private int count;
+
+  public RollingBuffer() {
+    for(int idx=0;idx<buffer.length;idx++) {
+      buffer[idx] = newInstance();
+    }
+  }
+
+  protected abstract T newInstance();
+
+  public void reset() {
+    nextWrite--;
+    while (count > 0) {
+      if (nextWrite == -1) {
+        nextWrite = buffer.length - 1;
+      }
+      buffer[nextWrite--].reset();
+      count--;
+    }
+    nextWrite = 0;
+    nextPos = 0;
+    count = 0;
+  }
+
+  // For assert:
+  private boolean inBounds(int pos) {
+    return pos < nextPos && pos >= nextPos - count;
+  }
+
+  private int getIndex(int pos) {
+    int index = nextWrite - (nextPos - pos);
+    if (index < 0) {
+      index += buffer.length;
+    }
+    return index;
+  }
+
+  /** Get T instance for this absolute position;
+   *  this is allowed to be arbitrarily far "in the
+   *  future" but cannot be before the last freeBefore. */
+  public T get(int pos) {
+    //System.out.println("RA.get pos=" + pos + " nextPos=" + nextPos + " nextWrite=" + nextWrite + " count=" + count);
+    while (pos >= nextPos) {
+      if (count == buffer.length) {
+        @SuppressWarnings("unchecked") T[] newBuffer = (T[]) new Resettable[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+        //System.out.println("  grow length=" + newBuffer.length);
+        System.arraycopy(buffer, nextWrite, newBuffer, 0, buffer.length-nextWrite);
+        System.arraycopy(buffer, 0, newBuffer, buffer.length-nextWrite, nextWrite);
+        for(int i=buffer.length;i<newBuffer.length;i++) {
+          newBuffer[i] = newInstance();
+        }
+        nextWrite = buffer.length;
+        buffer = newBuffer;
+      }
+      if (nextWrite == buffer.length) {
+        nextWrite = 0;
+      }
+      // Should have already been reset:
+      // nocommit can we make subclass check this...?
+      //assert buffer[nextWrite].count == 0;
+      // init(buffer[nextWrite], pos);
+      nextWrite++;
+      nextPos++;
+      count++;
+    }
+    assert inBounds(pos);
+    final int index = getIndex(pos);
+    //System.out.println("  pos=" + pos + " nextPos=" + nextPos + " -> index=" + index);
+    //assert buffer[index].pos == pos;
+    return buffer[index];
+  }
+
+  public void freeBefore(int pos) {
+    final int toFree = count - (nextPos - pos);
+    assert toFree >= 0;
+    assert toFree <= count;
+    int index = nextWrite - count;
+    if (index < 0) {
+      index += buffer.length;
+    }
+    for(int i=0;i<toFree;i++) {
+      if (index == buffer.length) {
+        index = 0;
+      }
+      //System.out.println("  fb idx=" + index);
+      buffer[index].reset();
+      index++;
+    }
+    count -= toFree;
+  }
+}

Property changes on: lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java	(revision 0)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java	(working copy)
@@ -0,0 +1,62 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.util._TestUtil;
+
+// Randomly injects holes:
+public final class MockHoleInjectingTokenFilter extends TokenFilter {
+
+  private final long randomSeed;
+  private Random random;
+  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+
+  public MockHoleInjectingTokenFilter(Random random, TokenStream in) {
+    super(in);
+    randomSeed = random.nextLong();
+  }
+  
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    random = new Random(randomSeed);
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      final int posInc = posIncAtt.getPositionIncrement();
+      if (posInc > 0 && random.nextInt(5) == 3) {
+        posIncAtt.setPositionIncrement(posInc + _TestUtil.nextInt(random, 1, 5));
+        // TODO: should we tweak offsets...?
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // TODO: end?
+}
+
+

Property changes on: lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java	(revision 1307543)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java	(working copy)
@@ -23,6 +23,7 @@
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 
 /**
  * TokenStream from a canned list of Tokens.
@@ -32,10 +33,11 @@
   private int upto = 0;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
   
-  public CannedTokenStream(Token[] tokens) {
+  public CannedTokenStream(Token... tokens) {
     this.tokens = tokens;
   }
   
@@ -49,6 +51,7 @@
       termAtt.setEmpty();
       termAtt.append(token.toString());
       posIncrAtt.setPositionIncrement(token.getPositionIncrement());
+      posLengthAtt.setPositionLength(token.getPositionLength());
       offsetAtt.setOffset(token.startOffset(), token.endOffset());
       payloadAtt.setPayload(token.getPayload());
       return true;
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java	(revision 0)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java	(working copy)
@@ -0,0 +1,338 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.RollingBuffer;
+import org.apache.lucene.util._TestUtil;
+
+// TODO: once we work out "graph normal form", make sure
+// this respects i:
+
+/** Randomly inserts overlapped (posInc=0) tokens with
+ *  posLength sometimes > 1.  The chain must have
+ *  an OffsetAttribute.  */
+
+public final class MockGraphTokenFilter extends TokenFilter {
+
+  private static boolean DEBUG = false;
+
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+  private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+  private final long seed;
+  private Random random;
+
+  // nocommit in theory we should not have to init to -1
+  // here... ie, caller must always call reset() first:
+  private int inputPos = -1;
+  private int outputPos;
+  private int lastOutputPos = -1;
+  private boolean end;
+
+  private final class Position implements RollingBuffer.Resettable {
+    final List<AttributeSource.State> states = new ArrayList<AttributeSource.State>();
+    int nextRead;
+
+    // Any token leaving from this position should have this startOffset:
+    int startOffset = -1;
+
+    // Any token arriving to this positoin should have this endOffset:
+    int endOffset = -1;
+
+    @Override
+    public void reset() {
+      states.clear();
+      nextRead = 0;
+      startOffset = -1;
+      endOffset = -1;
+    }
+
+    public void captureState() throws IOException {
+      assert startOffset == offsetAtt.startOffset();
+      states.add(MockGraphTokenFilter.this.captureState());
+    }
+  }
+
+  private final RollingBuffer<Position> positions = new RollingBuffer<Position>() {
+    @Override
+    protected Position newInstance() {
+      return new Position();
+    }
+  };
+
+  public MockGraphTokenFilter(Random random, TokenStream input) {
+    super(input);
+    seed = random.nextLong();
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    end = false;
+    positions.reset();
+    // NOTE: must be "deterministically random" because
+    // BaseTokenStreamTestCase pulls tokens twice on the
+    // same input and asserts they are the same:
+    this.random = new Random(seed);
+    inputPos = -1;
+    outputPos = 0;
+    lastOutputPos = -1;
+  }
+
+  // we never extend beyond end of input tokens:
+  /*
+  @Override
+  public void end() throws IOException {
+    super.end();
+    final int posInc = posIncAtt.getPositionIncrement();
+    // nocommit fixme: use Positions.getMaxIndex
+    if (posInc < pendingPositions) {
+      // NOTE: not used yet (see LUCENE-3849)
+      // Make sure pos ends to cover our longest graph output:
+      // nocommit must accumulate:
+      posIncAtt.setPositionIncrement(pendingPositions + posIncAtt.getPositionIncrement());
+    }
+  }
+  */
+
+  private enum TOKEN_POS {SAME, NEXT, END};
+
+  private TOKEN_POS nextInputToken() throws IOException {
+    assert !end;
+    boolean tokenPending = false;
+    if (DEBUG) {
+      System.out.println("  call input.incr");
+    }
+    final boolean result = input.incrementToken();
+    if (result) {
+      final int posInc = posIncAtt.getPositionIncrement();
+      final int posLength = posLengthAtt.getPositionLength();
+
+      // NOTE: when posLength > 1, we have a hole... we
+      // don't allow injected tokens to start or end
+      // "inside" a hole, so we don't need to make up
+      // offsets inside it
+
+      assert inputPos != -1 || posInc > 0;
+      inputPos += posInc;
+      if (DEBUG) {
+        System.out.println("    got token term=" + termAtt + " posLength=" + posLength + " posInc=" + posInc + " inputPos=" + inputPos);
+      }
+      final Position posData = positions.get(inputPos);
+      if (posInc == 0) {
+        assert posData.startOffset == offsetAtt.startOffset();
+      } else {
+        assert posData.startOffset == -1;
+        posData.startOffset = offsetAtt.startOffset();
+        if (DEBUG) {
+          System.out.println("    record startOffset[" + inputPos + "]=" + posData.startOffset);
+        }
+      }
+
+      final Position posEndData = positions.get(inputPos + posLength);
+      if (posEndData.endOffset == -1) {
+        // First time we are seeing a token that
+        // arrives to this position: record the
+        // endOffset
+        posEndData.endOffset = offsetAtt.endOffset();
+        if (DEBUG) {
+          System.out.println("    record endOffset[" + (inputPos+posLength) + "]=" + posEndData.endOffset);
+        }
+      } else {
+        // We've already seen a token arriving there;
+        // make sure its endOffset is the same (NOTE:
+        // some tokenizers, eg WDF, will fail
+        // this...):
+        assert posEndData.endOffset == offsetAtt.endOffset(): "posEndData.endOffset=" + posEndData.endOffset + " vs offsetAtt.endOffset()=" + offsetAtt.endOffset();
+      }
+      if (posInc == 0) {
+        return TOKEN_POS.SAME;
+      } else {
+        return TOKEN_POS.NEXT;
+      }
+    } else {
+      // nocommit can i call end() here...?
+      if (DEBUG) {
+        System.out.println("    got END");
+      }
+      return TOKEN_POS.END;
+    }
+  }
+
+  private void pushOutputPos() {
+    posIncAtt.setPositionIncrement(outputPos - lastOutputPos);
+    if (DEBUG) {
+      System.out.println("  pushOutputPos: set posInc=" + posIncAtt.getPositionIncrement());
+    }
+    lastOutputPos = outputPos;
+    positions.freeBefore(outputPos);
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+
+    if (DEBUG) {
+      System.out.println("MockGraphTF.incr inputPos=" + inputPos + " outputPos=" + outputPos);
+    }
+
+    while (true) {
+      final Position posData = positions.get(outputPos);
+      if (posData.nextRead < posData.states.size()) {
+        // Serve up all buffered tokens from this position:
+        if (DEBUG) {
+          System.out.println("  restore buffered nextRead=" + posData.nextRead + " vs " + posData.states.size());
+        }
+        restoreState(posData.states.get(posData.nextRead++));
+        if (DEBUG) {
+          System.out.println("    term=" + termAtt + " outputPos=" + outputPos);
+        }
+        pushOutputPos();
+        return true;
+      }
+
+      boolean tokenPending = false;
+
+      final int prevInputPos = inputPos;
+
+      if (inputPos == -1 || inputPos == outputPos) {
+        // We've used up the buffered tokens; pull the next
+        // input token:
+        if (end) {
+          return false;
+        }
+        final TOKEN_POS result = nextInputToken();
+        if (result == TOKEN_POS.SAME) {
+          return true;
+        } else if (result == TOKEN_POS.NEXT) {
+          tokenPending = true;
+        } else {
+          // nocommit not needed?  we did "pass-through" on
+          // the end-ness?  consumer must not call incrToken
+          // again...
+          //end = true;
+          return false;
+        }
+      } else {
+        assert inputPos > outputPos;
+        if (DEBUG) {
+          System.out.println("  done @ outputPos=" + outputPos);
+        }
+      }
+
+      // We're done (above) serving up all tokens leaving
+      // from the same position; now maybe insert a token.
+      // Note that we may insert more than one token leaving
+      // from this position.  We only inject tokens at
+      // positions where we've seen at least one input token
+      // (ie, we cannot inject inside holes):
+
+      if (prevInputPos != -1  && positions.get(outputPos).startOffset != -1 && random.nextInt(7) == 5) {
+        if (DEBUG) {
+          System.out.println("  inject @ outputPos=" + outputPos);
+        }
+        /*
+        if (positions.get(outputPos).states.size() == 0 && !tokenPending) {
+          System.out.println("START IN HOLE");
+        } else {
+          System.out.println("START NOT IN HOLE");
+        }
+        */
+        if (tokenPending) {
+          positions.get(inputPos).captureState();
+        }
+        final int posLength = _TestUtil.nextInt(random, 1, 5);
+        final Position posEndData = positions.get(outputPos + posLength);
+
+        // Pull enough tokens until we discover what our
+        // endOffset should be:
+        while (!end && posEndData.endOffset == -1 && inputPos <= (outputPos + posLength)) {
+          if (DEBUG) {
+            System.out.println("  lookahead [endPos=" + (outputPos + posLength) + "]...");
+          }
+          final TOKEN_POS result = nextInputToken();
+          if (result != TOKEN_POS.END) {
+            positions.get(inputPos).captureState();
+          } else {
+            end = true;
+            if (DEBUG) {
+              System.out.println("    force end lookahead");
+            }
+            break;
+          }
+        }
+
+        // nocommit how come tokenizers don't send end state
+        // on incrementToken() returning false...?  why
+        // special end()...?
+
+        if (posEndData.endOffset != -1) {
+          assert posEndData.endOffset != -1;
+          clearAttributes();
+          posLengthAtt.setPositionLength(posLength);
+          termAtt.append(_TestUtil.randomUnicodeString(random));
+          pushOutputPos();
+          offsetAtt.setOffset(positions.get(outputPos).startOffset,
+                              positions.get(outputPos + posLength).endOffset);
+          if (DEBUG) {
+            System.out.println("  inject: outputPos=" + outputPos + " startOffset=" + offsetAtt.startOffset() +
+                               " endOffset=" + offsetAtt.endOffset() +
+                               " posLength=" + posLengthAtt.getPositionLength());
+          }
+          // TODO: set TypeAtt too?
+          return true;
+
+        } else {
+          // Either, we hit the end of the tokens (ie, our
+          // attempted posLength is too long because it
+          // hangs out over the end), or, our attempted
+          // posLength ended in the middle of a hole; just
+          // skip injecting in these cases.  We will still
+          // test these cases by having a StopFilter after
+          // MockGraphTokenFilter...
+        }
+
+      } else if (tokenPending) {
+        outputPos = inputPos;
+        if (DEBUG) {
+          System.out.println("  pass-through");
+        }
+        pushOutputPos();
+        return true;
+      } else {
+        // We are skipping over a hole (posInc > 1) from our input:
+        outputPos++;
+        if (DEBUG) {
+          System.out.println("  incr outputPos=" + outputPos);
+        }
+      }
+    }
+  }
+}

Property changes on: lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java	(revision 1307543)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java	(working copy)
@@ -138,7 +138,7 @@
       if (typeAtt != null) typeAtt.setType("bogusType");
       if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
       if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
-      
+
       checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
       assertTrue("token "+i+" does not exist", ts.incrementToken());
       assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled());
@@ -152,8 +152,18 @@
         assertEquals("type "+i, types[i], typeAtt.type());
       if (posIncrements != null)
         assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement());
-      if (posLengths != null)
-        assertEquals("posLength "+i, posLengths[i], posLengthAtt.getPositionLength());
+
+      if (posLengths != null) {
+        final int posLen = posLengthAtt.getPositionLength();
+        assertEquals("posLength "+i, posLengths[i], posLen);
+        // TODO: get this working!  But, this doesn't work
+        // correctly when posInc != 1 (eg, it won't handle
+        // a StopFilter that removes tokens that a syn filter
+        // had previously matched):
+        //if (endOffsets != null) {
+        //assertEquals("endOffsets "+i, endOffsets[i+posLen-1], offsetAtt.endOffset());
+        //}
+      }
       
       // we can enforce some basic things about a few attributes even if the caller doesn't check:
       if (offsetAtt != null) {
@@ -436,6 +446,12 @@
       }
     }
     ts.end();
+
+    // Make sure Tokenizer consumed entire text input:
+    if (offsetAtt != null) {
+      assertEquals("finalOffset ", text.length(), offsetAtt.endOffset());
+    }
+
     ts.close();
 
     // verify reusing is "reproducable" and also get the normal tokenstream sanity checks
@@ -445,6 +461,8 @@
       // even when input is length 0:
       if (text.length() != 0) {
 
+        // nocommit verify posLen>1 tokens have the right offsets:
+
         // (Optional) second pass: do something evil:
         final int evilness = random.nextInt(50);
         if (evilness == 17) {
