Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java	(revision 1311123)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java	(working copy)
@@ -18,17 +18,24 @@
  */
 
 import java.io.File;
+import java.io.InputStream;
 import java.io.Reader;
 import java.io.StringReader;
 import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Modifier;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Enumeration;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
+import java.util.Set;
+import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -36,16 +43,40 @@
 import org.apache.lucene.analysis.CharReader;
 import org.apache.lucene.analysis.CharStream;
 import org.apache.lucene.analysis.EmptyTokenizer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
+import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
+import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+import org.apache.lucene.analysis.hunspell.HunspellDictionary;
+import org.apache.lucene.analysis.hunspell.HunspellDictionaryTest;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
 import org.apache.lucene.analysis.ngram.NGramTokenFilter;
 import org.apache.lucene.analysis.ngram.NGramTokenizer;
+import org.apache.lucene.analysis.payloads.IdentityEncoder;
+import org.apache.lucene.analysis.payloads.PayloadEncoder;
+import org.apache.lucene.analysis.snowball.TestSnowball;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.analysis.util.CharArrayMap;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.tartarus.snowball.SnowballProgram;
+import org.xml.sax.InputSource;
 
 /** tests random analysis chains */
 public class TestRandomChains extends BaseTokenStreamTestCase {
@@ -70,6 +101,8 @@
           || c.equals(EmptyTokenizer.class)
           // doesn't actual reset itself!
           || c.equals(CachingTokenFilter.class)
+          // doesn't consume whole stream!
+          || c.equals(LimitTokenCountFilter.class)
           // broken!
           || c.equals(NGramTokenizer.class)
           // broken!
@@ -123,6 +156,8 @@
       Random random = new Random(seed);
       TokenizerSpec tokenizerspec = newTokenizer(random, reader);
       TokenFilterSpec filterspec = newFilterChain(random, tokenizerspec.tokenizer);
+      //System.out.println("seed=" + seed + ",tokenizerSpec=" + tokenizerspec.toString);
+      //System.out.println("seed=" + seed + ",tokenfilterSpec=" + filterspec.toString);
       return new TokenStreamComponents(tokenizerspec.tokenizer, filterspec.stream);
     }
 
@@ -130,6 +165,7 @@
     protected Reader initReader(Reader reader) {
       Random random = new Random(seed);
       CharFilterSpec charfilterspec = newCharFilterChain(random, reader);
+      //System.out.println("seed=" + seed + ",charFilterSpec=" + charfilterspec.toString);
       return charfilterspec.reader;
     }
 
@@ -156,23 +192,42 @@
     // create a new random tokenizer from classpath
     private TokenizerSpec newTokenizer(Random random, Reader reader) {
       TokenizerSpec spec = new TokenizerSpec();
+      Class<? extends Tokenizer> clazz = null; // out of the loop for debugging
       boolean success = false;
       while (!success) {
         try {
-          // TODO: check Reader+Version,Version+Reader too
-          // also look for other variants and handle them special
           int idx = random.nextInt(tokenizers.size());
-          try {
-            Constructor<? extends Tokenizer> c = tokenizers.get(idx).getConstructor(Version.class, Reader.class);
-            spec.tokenizer = c.newInstance(TEST_VERSION_CURRENT, reader);
-          } catch (NoSuchMethodException e) {
-            Constructor<? extends Tokenizer> c = tokenizers.get(idx).getConstructor(Reader.class);
-            spec.tokenizer = c.newInstance(reader);
+          clazz = tokenizers.get(idx);
+          Constructor<?>[] ctors = clazz.getConstructors();
+          Arrays.sort(ctors, ctorComp);
+          @SuppressWarnings("unchecked")
+          Constructor<? extends Tokenizer> ctor = (Constructor<? extends Tokenizer>) ctors[random.nextInt(ctors.length)];
+          if (ctor.isAnnotationPresent(Deprecated.class)) {
+            continue; // don't test deprecated ctors, they likely have known bugs
           }
-          spec.toString = tokenizers.get(idx).toString();
+          Object args[] = newTokenizerArgs(random, reader, ctor.getParameterTypes());
+          spec.tokenizer = ctor.newInstance(args);
+          spec.toString = clazz.toString() + ("(" + Arrays.toString(args) + ")");
           success = true;
         } catch (Exception e) {
-          // ignore
+          if (e instanceof UnsupportedOperationException) {
+            // ignore
+            System.err.println("WARNING: " + e + " for " + clazz);
+          } else if (e instanceof InvocationTargetException) {
+            if (e.getCause() instanceof IllegalArgumentException ||
+                e.getCause() instanceof UnsupportedOperationException) {
+              // thats ok
+              if (VERBOSE) {
+                System.err.println("Ignoring IAE/UOE from ctor:");
+                e.printStackTrace();
+              }
+            } else {
+              // not ok
+              throw new RuntimeException(e);
+            }
+          } else {
+            throw new RuntimeException(e);
+          }
         }
       }
       return spec;
@@ -183,27 +238,53 @@
       spec.reader = reader;
       StringBuilder descr = new StringBuilder();
       int numFilters = random.nextInt(3);
+      Class<? extends CharStream> clazz = null; // out of the loop for debugging
       for (int i = 0; i < numFilters; i++) {
         boolean success = false;
         while (!success) {
           try {
             // TODO: also look for other variants and handle them special
             int idx = random.nextInt(charfilters.size());
-            try {
-              Constructor<? extends CharStream> c = charfilters.get(idx).getConstructor(Reader.class);
-              spec.reader = c.newInstance(spec.reader);
-            } catch (NoSuchMethodException e) {
-              Constructor<? extends CharStream> c = charfilters.get(idx).getConstructor(CharStream.class);
-              spec.reader = c.newInstance(CharReader.get(spec.reader));
+            clazz = charfilters.get(idx);
+            Constructor<?>[] ctors = clazz.getConstructors();
+            if (ctors.length == 0) {
+              continue; // CharReader: has only a static get....
             }
+            Arrays.sort(ctors, ctorComp);
+            @SuppressWarnings("unchecked")
+            Constructor<? extends CharStream> ctor = (Constructor<? extends CharStream>) ctors[random.nextInt(ctors.length)];
+            if (ctor.isAnnotationPresent(Deprecated.class)) {
+              continue; // don't test deprecated ctors, they likely have known bugs
+            }
+            Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
+            spec.reader = ctor.newInstance(args);
 
             if (descr.length() > 0) {
               descr.append(",");
             }
-            descr.append(charfilters.get(idx).toString());
+            descr.append(clazz.toString());
+            descr.append("(" + Arrays.toString(args) + ")");
             success = true;
           } catch (Exception e) {
-            // ignore
+            if (e instanceof UnsupportedOperationException) {
+              // ignore
+              System.err.println("WARNING: " + e + " for " + clazz);
+            } else if (e instanceof InvocationTargetException) {
+              if (e.getCause() instanceof IllegalArgumentException ||
+                  e.getCause() instanceof UnsupportedOperationException) {
+                // thats ok
+                if (VERBOSE) {
+                  System.err.println("Ignoring IAE/UOE from ctor:");
+                  e.printStackTrace();
+                }
+              } else {
+                // not ok
+                System.err.println(clazz);
+                throw new RuntimeException(e);
+              }
+            } else {
+              throw new RuntimeException(e);
+            }
           }
         }
       }
@@ -216,32 +297,274 @@
       spec.stream = tokenizer;
       StringBuilder descr = new StringBuilder();
       int numFilters = random.nextInt(5);
+      Class<? extends TokenFilter> clazz = null; // out of the loop for debugging
       for (int i = 0; i < numFilters; i++) {
         boolean success = false;
         while (!success) {
           try {
-            // TODO: also look for other variants and handle them special
             int idx = random.nextInt(tokenfilters.size());
-            try {
-              Constructor<? extends TokenFilter> c = tokenfilters.get(idx).getConstructor(Version.class, TokenStream.class);
-              spec.stream = c.newInstance(TEST_VERSION_CURRENT, spec.stream);
-            } catch (NoSuchMethodException e) {
-              Constructor<? extends TokenFilter> c = tokenfilters.get(idx).getConstructor(TokenStream.class);
-              spec.stream = c.newInstance(spec.stream);
+            clazz = tokenfilters.get(idx);
+            Constructor<?>[] ctors = clazz.getConstructors();
+            Arrays.sort(ctors, ctorComp);
+            @SuppressWarnings("unchecked")
+            Constructor<? extends TokenFilter> ctor = (Constructor<? extends TokenFilter>) ctors[random.nextInt(ctors.length)];
+            if (ctor.isAnnotationPresent(Deprecated.class)) {
+              continue; // don't test deprecated ctors, they likely have known bugs
             }
+            Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
+            spec.stream = ctor.newInstance(args);
             if (descr.length() > 0) {
               descr.append(",");
             }
-            descr.append(tokenfilters.get(idx).toString());
+            descr.append(clazz.toString());
+            descr.append("(" + Arrays.toString(args) + ")");
             success = true;
           } catch (Exception e) {
-            // ignore
+            if (e instanceof UnsupportedOperationException) {
+              // ignore
+              System.err.println("WARNING: " + e + " for " + clazz);
+            } else if (e instanceof InvocationTargetException) {
+              if (e.getCause() instanceof IllegalArgumentException ||
+                  e.getCause() instanceof UnsupportedOperationException) {
+                // thats ok
+                if (VERBOSE) {
+                  System.err.println("Ignoring IAE/UOE from ctor:");
+                  e.printStackTrace();
+                }
+              } else {
+                // not ok
+                throw new RuntimeException(e);
+              }
+            } else {
+              throw new RuntimeException(e);
+            }
           }
         }
       }
       spec.toString = descr.toString();
       return spec;
     }
+    
+    private Object[] newTokenizerArgs(Random random, Reader reader, Class<?>[] paramTypes) {
+      Object[] args = new Object[paramTypes.length];
+      for (int i = 0; i < args.length; i++) {
+        Class<?> paramType = paramTypes[i];
+        if (paramType.equals(Reader.class)) {
+          args[i] = reader;
+        } else if (paramType.equals(AttributeFactory.class)) {
+          // TODO: maybe the collator one...???
+          args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
+        } else if (paramType.equals(AttributeSource.class)) {
+          args[i] = null; // this always gives IAE: fine 
+        } else {
+          args[i] = randomArg(random, paramType);
+        }
+      }
+      return args;
+    }
+    
+    private Object[] newCharFilterArgs(Random random, Reader reader, Class<?>[] paramTypes) {
+      Object[] args = new Object[paramTypes.length];
+      for (int i = 0; i < args.length; i++) {
+        Class<?> paramType = paramTypes[i];
+        if (paramType.equals(Reader.class)) {
+          args[i] = reader;
+        } else if (paramType.equals(CharStream.class)) {
+          args[i] = CharReader.get(reader);
+        } else {
+          args[i] = randomArg(random, paramType);
+        }
+      }
+      return args;
+    }
+    
+    private Object[] newFilterArgs(Random random, TokenStream stream, Class<?>[] paramTypes) {
+      Object[] args = new Object[paramTypes.length];
+      for (int i = 0; i < args.length; i++) {
+        Class<?> paramType = paramTypes[i];
+        if (paramType.equals(TokenStream.class)) {
+          args[i] = stream;
+        } else if (paramType.equals(CommonGramsFilter.class)) {
+          // CommonGramsQueryFilter takes this one explicitly
+          args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, randomCharArraySet(random));
+        } else {
+          args[i] = randomArg(random, paramType);
+        }
+      }
+      return args;
+    }
+    
+    // TODO: we should sometimes pass null for some params?
+    private Object randomArg(Random random, Class<?> paramType)  {
+      if (paramType.equals(int.class)) {
+        // TODO: could cause huge ram usage to use full int range for some filters
+        // (e.g. allocate enormous arrays)
+        // return Integer.valueOf(random.nextInt());
+        return Integer.valueOf(_TestUtil.nextInt(random, -100, 100));
+      } else if (paramType.equals(char.class)) {
+        return Character.valueOf((char)random.nextInt(65536));
+      } else if (paramType.equals(float.class)) {
+        return Float.valueOf(random.nextFloat());
+      } else if (paramType.equals(boolean.class)) {
+        return Boolean.valueOf(random.nextBoolean());
+      } else if (paramType.equals(byte.class)) {
+        byte bytes[] = new byte[1];
+        random.nextBytes(bytes);
+        return Byte.valueOf(bytes[0]);
+      } else if (paramType.equals(byte[].class)) {
+        byte bytes[] = new byte[random.nextInt(256)];
+        random.nextBytes(bytes);
+        return bytes;
+      } else if (paramType.equals(Random.class)) {
+        return new Random(random.nextLong());
+      } else if (paramType.equals(Version.class)) {
+        // we expect bugs in emulating old versions
+        return TEST_VERSION_CURRENT;
+      } else if (paramType.equals(Set.class)) {
+        // TypeTokenFilter
+        Set<String> set = new HashSet<String>();
+        int num = random.nextInt(5);
+        for (int i = 0; i < num; i++) {
+          set.add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]);
+        }
+        return set;
+      } else if (paramType.equals(Collection.class)) {
+        // CapitalizationFilter
+        Collection<char[]> col = new ArrayList<char[]>();
+        int num = random.nextInt(5);
+        for (int i = 0; i < num; i++) {
+          col.add(_TestUtil.randomSimpleString(random).toCharArray());
+        }
+        return col;
+      } else if (paramType.equals(CharArraySet.class)) {
+        return randomCharArraySet(random);
+      } else if (paramType.equals(Pattern.class)) {
+        // TODO: don't want to make the exponentially slow ones Dawid documents
+        // in TestPatternReplaceFilter, so dont use truly random patterns (for now)
+        return Pattern.compile("a");
+      } else if (paramType.equals(PayloadEncoder.class)) {
+        return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
+      } else if (paramType.equals(HunspellDictionary.class)) {
+        // TODO: make nastier
+        InputStream affixStream = HunspellDictionaryTest.class.getResourceAsStream("test.aff");
+        InputStream dictStream = HunspellDictionaryTest.class.getResourceAsStream("test.dic");
+        try {
+         return new HunspellDictionary(affixStream, dictStream, TEST_VERSION_CURRENT);
+        } catch (Exception ex) {
+          throw new RuntimeException(ex);
+        }
+      } else if (paramType.equals(HyphenationTree.class)) {
+        // TODO: make nastier
+        try {
+          InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
+          HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+          return hyphenator;
+        } catch (Exception ex) {
+          throw new RuntimeException(ex);
+        }
+      } else if (paramType.equals(SnowballProgram.class)) {
+        try {
+          String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)];
+          Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class);
+          return clazz.newInstance();
+        } catch (Exception ex) {
+          throw new RuntimeException(ex);
+        }
+      } else if (paramType.equals(String.class)) {
+        // TODO: make nastier
+        if (random.nextBoolean()) {
+          // a token type
+          return StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)];
+        } else {
+          return _TestUtil.randomSimpleString(random);
+        }
+      } else if (paramType.equals(NormalizeCharMap.class)) {
+        NormalizeCharMap map = new NormalizeCharMap();
+        // we can't add duplicate keys, or NormalizeCharMap gets angry
+        Set<String> keys = new HashSet<String>();
+        int num = random.nextInt(5);
+        for (int i = 0; i < num; i++) {
+          String key = _TestUtil.randomSimpleString(random);
+          if (!keys.contains(key)) {
+            map.add(key,_TestUtil.randomSimpleString(random));
+            keys.add(key);
+          }
+        }
+        return map;
+      } else if (paramType.equals(CharacterRunAutomaton.class)) {
+        // TODO: could probably use a purely random automaton
+        switch(random.nextInt(5)) {
+          case 0: return MockTokenizer.KEYWORD;
+          case 1: return MockTokenizer.SIMPLE;
+          case 2: return MockTokenizer.WHITESPACE;
+          case 3: return MockTokenFilter.EMPTY_STOPSET;
+          default: return MockTokenFilter.ENGLISH_STOPSET;
+        }
+      } else if (paramType.equals(CharArrayMap.class)) {
+        return randomCharArrayMap(random);
+      } else if (paramType.equals(SynonymMap.class)) {
+        return randomSynonymMap(random);
+      } else {
+        throw new UnsupportedOperationException("Don't know how to make a random " + paramType);
+      }
+    }
+    
+    private CharArraySet randomCharArraySet(Random random) {
+      int num = random.nextInt(10);
+      CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean());
+      for (int i = 0; i < num; i++) {
+        // TODO: make nastier
+        set.add(_TestUtil.randomSimpleString(random));
+      }
+      return set;
+    }
+    
+    private CharArrayMap<String> randomCharArrayMap(Random random) {
+      int num = random.nextInt(10);
+      CharArrayMap<String> map = new CharArrayMap<String>(TEST_VERSION_CURRENT, num, random.nextBoolean());
+      for (int i = 0; i < num; i++) {
+        // TODO: make nastier
+        map.put(_TestUtil.randomSimpleString(random), _TestUtil.randomSimpleString(random));
+      }
+      return map;
+    }
+    
+    // unapologetically stolen from TestSynonymMapFilter
+    private SynonymMap randomSynonymMap(Random random) {
+      SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
+      final int numEntries = atLeast(10);
+      for (int j = 0; j < numEntries; j++) {
+        addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
+      }
+      try {
+        return b.build();
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    private void addSyn(SynonymMap.Builder b, String input, String output, boolean keepOrig) {
+      b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
+            new CharsRef(output.replaceAll(" +", "\u0000")),
+            keepOrig);
+    }
+    
+    private String randomNonEmptyString(Random random) {
+      while(true) {
+        final String s = _TestUtil.randomUnicodeString(random).trim();
+        if (s.length() != 0 && s.indexOf('\u0000') == -1) {
+          return s;
+        }
+      }
+    }
+    
+    // TODO: better comparator?
+    final Comparator<Constructor<?>> ctorComp = new Comparator<Constructor<?>>() {
+      @Override
+      public int compare(Constructor<?> arg0, Constructor<?> arg1) {
+        return arg0.toGenericString().compareTo(arg1.toGenericString());
+      }
+    };
   }
   
   static class TokenizerSpec {
@@ -263,7 +586,7 @@
     int numIterations = atLeast(20);
     for (int i = 0; i < numIterations; i++) {
       MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
-      if (VERBOSE) {
+      if (true || VERBOSE) {
         System.out.println("Creating random analyzer:" + a);
       }
       try {
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java	(revision 1311123)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java	(working copy)
@@ -142,14 +142,16 @@
     }
   }
   
+  /** for testing purposes ONLY */
+  public static String SNOWBALL_LANGS[] = {
+    "Armenian", "Basque", "Catalan", "Danish", "Dutch", "English",
+    "Finnish", "French", "German2", "German", "Hungarian", "Irish",
+    "Italian", "Kp", "Lovins", "Norwegian", "Porter", "Portuguese",
+    "Romanian", "Russian", "Spanish", "Swedish", "Turkish"
+  };
+  
   public void testEmptyTerm() throws IOException {
-    String langs[] = { 
-        "Armenian", "Basque", "Catalan", "Danish", "Dutch", "English",
-        "Finnish", "French", "German2", "German", "Hungarian", "Irish",
-        "Italian", "Kp", "Lovins", "Norwegian", "Porter", "Portuguese",
-        "Romanian", "Russian", "Spanish", "Swedish", "Turkish"
-    };
-    for (final String lang : langs) {
+    for (final String lang : SNOWBALL_LANGS) {
       Analyzer a = new Analyzer() {
         @Override
         protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java	(revision 1311123)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java	(working copy)
@@ -71,6 +71,10 @@
     this.group = group;
     fillBuffer(str, input);
     matcher = pattern.matcher(str);
+    // confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher
+    if (group >= 0 && group > matcher.groupCount()) {
+      throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
+    }
     index = 0;
   }
 
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java	(revision 1311123)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java	(working copy)
@@ -57,6 +57,9 @@
    */
   public PositionFilter(final TokenStream input, final int positionIncrement) {
     super(input);
+    if (positionIncrement < 0) {
+      throw new IllegalArgumentException("positionIncrement may not be negative");
+    }
     this.positionIncrement = positionIncrement;
   }
 
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java	(revision 1311123)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java	(working copy)
@@ -43,16 +43,25 @@
 
   public KeywordTokenizer(Reader input, int bufferSize) {
     super(input);
+    if (bufferSize <= 0) {
+      throw new IllegalArgumentException("bufferSize must be > 0");
+    }
     termAtt.resizeBuffer(bufferSize);
   }
 
   public KeywordTokenizer(AttributeSource source, Reader input, int bufferSize) {
     super(source, input);
+    if (bufferSize <= 0) {
+      throw new IllegalArgumentException("bufferSize must be > 0");
+    }
     termAtt.resizeBuffer(bufferSize);
   }
 
   public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
     super(factory, input);
+    if (bufferSize <= 0) {
+      throw new IllegalArgumentException("bufferSize must be > 0");
+    }
     termAtt.resizeBuffer(bufferSize);
   }
   
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java	(revision 1311123)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java	(working copy)
@@ -67,7 +67,7 @@
         Class.forName("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(SnowballProgram.class);
       stemmer = stemClass.newInstance();
     } catch (Exception e) {
-      throw new RuntimeException(e.toString());
+      throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);
     }
   }
 
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java	(revision 1311123)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java	(working copy)
@@ -77,6 +77,13 @@
 
   public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
     super(input);
+    if (bufferSize < 0) {
+      throw new IllegalArgumentException("bufferSize cannot be negative");
+    }
+    if (skip < 0) {
+      // nocommit: not quite right right here: see line 84... if skip > numTokensFound we always get a NegativeArrayException? needs fixing!
+      throw new IllegalArgumentException("skip cannot be negative");
+    }
     termAtt.resizeBuffer(bufferSize);
     this.delimiter = delimiter;
     this.replacement = replacement;
@@ -137,7 +144,11 @@
       }
       resultToken.getChars(0, resultToken.length(), resultTokenBuffer, 0);
       resultToken.setLength(0);
-      endPosition = delimiterPositions.get(delimitersCount-1 - skip);
+      int idx = delimitersCount-1 - skip;
+      if (idx >= 0) {
+        // otherwise its ok, because we will skip and return false
+        endPosition = delimiterPositions.get(idx);
+      }
       finalOffset = correctOffset(length);
       posAtt.setPositionIncrement(1);
     }
@@ -163,10 +174,11 @@
   }
 
   @Override
-  public void reset(Reader input) throws IOException {
-    super.reset(input);
+  public void reset() throws IOException {
+    super.reset();
     resultToken.setLength(0);
     finalOffset = 0;
+    endPosition = 0;
     skipped = 0;
     delimitersCount = -1;
     delimiterPositions.clear();
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java	(revision 1311123)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java	(working copy)
@@ -65,6 +65,12 @@
 
   public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
     super(input);
+    if (bufferSize < 0) {
+      throw new IllegalArgumentException("bufferSize cannot be negative");
+    }
+    if (skip < 0) {
+      throw new IllegalArgumentException("skip cannot be negative");
+    }
     termAtt.resizeBuffer(bufferSize);
 
     this.delimiter = delimiter;
@@ -85,10 +91,11 @@
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
   private int startPosition = 0;
-  private int finalOffset = 0;
   private int skipped = 0;
   private boolean endDelimiter = false;
   private StringBuilder resultToken;
+  
+  private int charsRead = 0;
 
 
   @Override
@@ -112,12 +119,13 @@
 
     while (true) {
       int c = input.read();
-      if( c < 0 ){
+      if (c >= 0) {
+        charsRead++;
+      } else {
         if( skipped > skip ) {
           length += resultToken.length();
           termAtt.setLength(length);
-          finalOffset = correctOffset(startPosition + length);
-          offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
+           offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length));
           if( added ){
             resultToken.setLength(0);
             resultToken.append(termAtt.buffer(), 0, length);
@@ -125,7 +133,6 @@
           return added;
         }
         else{
-          finalOffset = correctOffset(startPosition + length);
           return false;
         }
       }
@@ -168,8 +175,7 @@
     }
     length += resultToken.length();
     termAtt.setLength(length);
-    finalOffset = correctOffset(startPosition + length);
-    offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
+    offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition+length));
     resultToken.setLength(0);
     resultToken.append(termAtt.buffer(), 0, length);
     return true;
@@ -178,14 +184,15 @@
   @Override
   public final void end() {
     // set final offset
+    int finalOffset = correctOffset(charsRead);
     offsetAtt.setOffset(finalOffset, finalOffset);
   }
 
   @Override
-  public void reset(Reader input) throws IOException {
-    super.reset(input);
+  public void reset() throws IOException {
+    super.reset();
     resultToken.setLength(0);
-    finalOffset = 0;
+    charsRead = 0;
     endDelimiter = false;
     skipped = 0;
   }
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java	(revision 1311123)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java	(working copy)
@@ -55,7 +55,7 @@
       makeString("with"))));
   
   private final CharacterRunAutomaton filter;
-  private boolean enablePositionIncrements = false;
+  private boolean enablePositionIncrements = true;
 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@@ -67,14 +67,16 @@
    * @param filter DFA representing the terms that should be removed.
    * @param enablePositionIncrements true if the removal should accumulate position increments.
    */
-  public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
+  public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter) {
     super(input);
     this.filter = filter;
-    this.enablePositionIncrements = enablePositionIncrements;
   }
   
   @Override
   public boolean incrementToken() throws IOException {
+    // TODO: fix me when posInc=false, to work like FilteringTokenFilter in that case and not return
+    // initial token with posInc=0 ever
+    
     // return the first non-stop word found
     int skippedPositions = 0;
     while (input.incrementToken()) {
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java	(revision 1311123)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java	(working copy)
@@ -34,7 +34,9 @@
     // TODO: instead of fixed remainder... maybe a fixed
     // random seed?
     this.remainder = remainder;
-    assert remainder >= 0 && remainder < 10 : "invalid parameter";
+    if (remainder < 0 || remainder >= 10) {
+      throw new IllegalArgumentException("invalid remainder parameter (must be 0..10): " + remainder);
+    }
   }
   
   // for testing only, uses a remainder of 0
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java	(revision 1311123)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java	(working copy)
@@ -76,7 +76,7 @@
    * MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}).
    */
   public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false);
+    this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, true);
   }
 
   /** 
@@ -93,7 +93,8 @@
   public TokenStreamComponents createComponents(String fieldName, Reader reader) {
     MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase, maxTokenLength);
     tokenizer.setEnableChecks(enableChecks);
-    TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
+    MockTokenFilter filt = new MockTokenFilter(tokenizer, filter);
+    filt.setEnablePositionIncrements(enablePositionIncrements);
     return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
   }
   
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java	(revision 1311123)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java	(working copy)
@@ -34,6 +34,9 @@
 
   public MockFixedLengthPayloadFilter(Random random, TokenStream in, int length) {
     super(in);
+    if (length < 0) {
+      throw new IllegalArgumentException("length must be >= 0");
+    }
     this.random = random;
     this.bytes = new byte[length];
     this.payload = new Payload(bytes);
