Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternCaptureGroupTokenFilter.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternCaptureGroupTokenFilter.java	(revision 0)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternCaptureGroupTokenFilter.java	(working copy)
@@ -0,0 +1,606 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.StringReader;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+public class TestPatternCaptureGroupTokenFilter extends BaseTokenStreamTestCase {
+
+  public void testNoPattern() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        true
+    );
+  }
+
+  public void testNoMatch() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"xx"},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"xx"},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"xx"},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"xx"},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        true
+    );
+  }
+
+  public void testNoCapture() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {".."},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {".."},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {".."},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {".."},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        true
+    );
+  }
+
+  public void testEmptyCapture() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {".(y*)"},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {".(y*)"},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {".(y*)"},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {".(y*)"},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        true
+    );
+  }
+
+  public void testCaptureAll() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"(.+)"},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"(.+)"},
+        new String[] {"foobarbaz"},
+        new int[] {0},
+        new int[] {9},
+        new int[] {1},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"(.+)"},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"(.+)"},
+        new String[] {"foo","bar","baz"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        true
+    );
+  }
+
+  public void testCaptureStart() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"^(.)"},
+        new String[] {"f"},
+        new int[] {0},
+        new int[] {1},
+        new int[] {1},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"^(.)"},
+        new String[] {"foobarbaz","f"},
+        new int[] {0,0},
+        new int[] {9,1},
+        new int[] {1,0},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"^(.)"},
+        new String[] {"f","b","b"},
+        new int[] {0,4,8},
+        new int[] {1,5,9},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"^(.)"},
+        new String[] {"foo","f","bar","b","baz","b"},
+        new int[] {0,0,4,4,8,8},
+        new int[] {3,1,7,5,11,9},
+        new int[] {1,0,1,0,1,0},
+        true
+    );
+  }
+
+  public void testCaptureMiddle() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"^.(.)."},
+        new String[] {"o"},
+        new int[] {1},
+        new int[] {2},
+        new int[] {1},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"^.(.)."},
+        new String[] {"foobarbaz","o"},
+        new int[] {0,1},
+        new int[] {9,2},
+        new int[] {1,0},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"^.(.)."},
+        new String[] {"o","a","a"},
+        new int[] {1,5,9},
+        new int[] {2,6,10},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"^.(.)."},
+        new String[] {"foo","o","bar","a","baz","a"},
+        new int[] {0,1,4,5,8,9},
+        new int[] {3,2,7,6,11,10},
+        new int[] {1,0,1,0,1,0},
+        true
+    );
+  }
+
+  public void testCaptureEnd() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"(.)$"},
+        new String[] {"z"},
+        new int[] {8},
+        new int[] {9},
+        new int[] {1},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"(.)$"},
+        new String[] {"foobarbaz","z"},
+        new int[] {0,8},
+        new int[] {9,9},
+        new int[] {1,0},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"(.)$"},
+        new String[] {"o","r","z"},
+        new int[] {2,6,10},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"(.)$"},
+        new String[] {"foo","o","bar","r","baz","z"},
+        new int[] {0,2,4,6,8,10},
+        new int[] {3,3,7,7,11,11},
+        new int[] {1,0,1,0,1,0},
+        true
+    );
+  }
+
+  public void testCaptureStartMiddle() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"^(.)(.)"},
+        new String[] {"f","o"},
+        new int[] {0,1},
+        new int[] {1,2},
+        new int[] {1,0},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"^(.)(.)"},
+        new String[] {"foobarbaz","f","o"},
+        new int[] {0,0,1},
+        new int[] {9,1,2},
+        new int[] {1,0,0},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"^(.)(.)"},
+        new String[] {"f","o","b","a","b","a"},
+        new int[] {0,1,4,5,8,9},
+        new int[] {1,2,5,6,9,10},
+        new int[] {1,0,1,0,1,0},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"^(.)(.)"},
+        new String[] {"foo","f","o","bar","b","a","baz","b","a"},
+        new int[] {0,0,1,4,4,5,8,8,9},
+        new int[] {3,1,2,7,5,6,11,9,10},
+        new int[] {1,0,0,1,0,0,1,0,0},
+        true
+    );
+  }
+
+  public void testCaptureStartEnd() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"^(.).+(.)$"},
+        new String[] {"f","z"},
+        new int[] {0,8},
+        new int[] {1,9},
+        new int[] {1,0},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"^(.).+(.)$"},
+        new String[] {"foobarbaz","f","z"},
+        new int[] {0,0,8},
+        new int[] {9,1,9},
+        new int[] {1,0,0},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"^(.).+(.)$"},
+        new String[] {"f","o","b","r","b","z"},
+        new int[] {0,2,4,6,8,10},
+        new int[] {1,3,5,7,9,11},
+        new int[] {1,0,1,0,1,0},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"^(.).+(.)$"},
+        new String[] {"foo","f","o","bar","b","r","baz","b","z"},
+        new int[] {0,0,2,4,4,6,8,8,10},
+        new int[] {3,1,3,7,5,7,11,9,11},
+        new int[] {1,0,0,1,0,0,1,0,0},
+        true
+    );
+  }
+
+  public void testCaptureMiddleEnd() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"(.)(.)$"},
+        new String[] {"a","z"},
+        new int[] {7,8},
+        new int[] {8,9},
+        new int[] {1,0},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"(.)(.)$"},
+        new String[] {"foobarbaz","a","z"},
+        new int[] {0,7,8},
+        new int[] {9,8,9},
+        new int[] {1,0,0},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"(.)(.)$"},
+        new String[] {"o","o","a","r","a","z"},
+        new int[] {1,2,5,6,9,10},
+        new int[] {2,3,6,7,10,11},
+        new int[] {1,0,1,0,1,0},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"(.)(.)$"},
+        new String[] {"foo","o","o","bar","a","r","baz","a","z"},
+        new int[] {0,1,2,4,5,6,8,9,10},
+        new int[] {3,2,3,7,6,7,11,10,11},
+        new int[] {1,0,0,1,0,0,1,0,0},
+        true
+    );
+  }
+
+  public void testMultiCaptureOverlap() throws Exception {
+    testPatterns(
+        "foobarbaz",
+        new String[] {"(.(.(.)))"},
+        new String[] {"foo","oo","o","bar","ar","r","baz","az","z"},
+        new int[] {0,1,2,3,4,5,6,7,8},
+        new int[] {3,3,3,6,6,6,9,9,9},
+        new int[] {1,0,0,0,0,0,0,0,0},
+        false
+    );
+    testPatterns(
+        "foobarbaz",
+        new String[] {"(.(.(.)))"},
+        new String[] {"foobarbaz","foo","oo","o","bar","ar","r","baz","az","z"},
+        new int[] {0,0,1,2,3,4,5,6,7,8},
+        new int[] {9,3,3,3,6,6,6,9,9,9},
+        new int[] {1,0,0,0,0,0,0,0,0,0},
+        true
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"(.(.(.)))"},
+        new String[] {"foo","oo","o","bar","ar","r","baz","az","z"},
+        new int[] {0,1,2,4,5,6,8,9,10},
+        new int[] {3,3,3,7,7,7,11,11,11},
+        new int[] {1,0,0,1,0,0,1,0,0},
+        false
+    );
+
+    testPatterns(
+        "foo bar baz",
+        new String[] {"(.(.(.)))"},
+        new String[] {"foo","oo","o","bar","ar","r","baz","az","z"},
+        new int[] {0,1,2,4,5,6,8,9,10},
+        new int[] {3,3,3,7,7,7,11,11,11},
+        new int[] {1,0,0,1,0,0,1,0,0},
+        true
+    );
+  }
+
+  public void testMultiPattern() throws Exception {
+    testPatterns(
+        "aaabbbaaa",
+        new String[] {"(aaa)","(bbb)","(ccc)"},
+        new String[] {"aaa","bbb","aaa"},
+        new int[] {0,3,6},
+        new int[] {3,6,9},
+        new int[] {1,0,0},
+        false
+    );
+    testPatterns(
+        "aaabbbaaa",
+        new String[] {"(aaa)","(bbb)","(ccc)"},
+        new String[] {"aaabbbaaa","aaa","bbb","aaa"},
+        new int[] {0,0,3,6},
+        new int[] {9,3,6,9},
+        new int[] {1,0,0,0},
+        true
+    );
+
+    testPatterns(
+        "aaa bbb aaa",
+        new String[] {"(aaa)","(bbb)","(ccc)"},
+        new String[] {"aaa","bbb","aaa"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        false
+    );
+
+    testPatterns(
+        "aaa bbb aaa",
+        new String[] {"(aaa)","(bbb)","(ccc)"},
+        new String[] {"aaa","bbb","aaa"},
+        new int[] {0,4,8},
+        new int[] {3,7,11},
+        new int[] {1,1,1},
+        true
+    );
+  }
+
+
+  public void testCamelCase() throws Exception {
+    testPatterns(
+        "letsPartyLIKEits1999_dude",
+        new String[] {
+            "([A-Z]{2,})",
+            "(?<![A-Z])([A-Z][a-z]+)",
+            "(?:^|\\b|(?<=[0-9_])|(?<=[A-Z]{2}))([a-z]+)",
+            "([0-9]+)"
+        },
+        new String[] {"lets","Party","LIKE","its","1999","dude"},
+        new int[] {0,4,9,13,16,21},
+        new int[] {4,9,13,16,20,25},
+        new int[] {1,0,0,0,0,0,0},
+        false
+    );
+    testPatterns(
+        "letsPartyLIKEits1999_dude",
+        new String[] {
+            "([A-Z]{2,})",
+            "(?<![A-Z])([A-Z][a-z]+)",
+            "(?:^|\\b|(?<=[0-9_])|(?<=[A-Z]{2}))([a-z]+)",
+            "([0-9]+)"
+        },
+        new String[] {"letsPartyLIKEits1999_dude","lets","Party","LIKE","its","1999","dude"},
+        new int[] {0,0,4,9,13,16,21},
+        new int[] {25,4,9,13,16,20,25},
+        new int[] {1,0,0,0,0,0,0,0},
+        true
+    );
+  }
+
+  private void testPatterns(String input, String[] regexes, String[] tokens,
+      int[] startOffsets, int[] endOffsets, int[] positions,
+      boolean preserveOriginal) throws Exception {
+    Pattern[] patterns = new Pattern[regexes.length];
+    for (int i = 0; i < regexes.length; i++) {
+      patterns[i] = Pattern.compile(regexes[i]);
+    }
+    TokenStream ts = new PatternCaptureGroupTokenFilter(new MockTokenizer(
+        new StringReader(input), MockTokenizer.WHITESPACE, false),
+        preserveOriginal, patterns);
+    assertTokenStreamContents(ts, tokens, startOffsets, endOffsets, positions);
+  }
+
+}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java	(revision 1444706)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java	(working copy)
@@ -455,6 +455,12 @@
         return Pattern.compile("a");
       }
     });
+    
+    put(Pattern[].class, new ArgProducer() {
+      @Override public Object create(Random random) {
+        return new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")};
+      }
+    });
     put(PayloadEncoder.class, new ArgProducer() {
       @Override public Object create(Random random) {
         return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java	(revision 1444706)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilterFactory.java	(working copy)
@@ -23,8 +23,6 @@
 
 import java.util.Map;
 import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
-
 /**
  * Factory for {@link PatternReplaceFilter}. 
  * <pre class="prettyprint" >
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternCaptureGroupFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternCaptureGroupFilterFactory.java	(revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternCaptureGroupFilterFactory.java	(working copy)
@@ -0,0 +1,52 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link PatternCaptureGroupTokenFilter}. 
+* <pre class="prettyprint" >
+ * &lt;fieldType name="text_ptncapturegroup" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.PatternCaptureGroupTokenFilter" pattern="([^a-z])" preserve_original="true"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ *
+ * @see PatternCaptureGroupTokenFilter
+ */
+public class PatternCaptureGroupFilterFactory extends TokenFilterFactory {
+  private Pattern pattern;
+  private boolean preserveOriginal = true;
+  
+  @Override
+  public void init(Map<String, String> args) {
+    super.init(args);
+    pattern = getPattern("pattern");
+    preserveOriginal = args.containsKey("preserve_original") ? Boolean.parseBoolean(args.get("preserve_original")) : true;
+  }
+  @Override
+  public PatternCaptureGroupTokenFilter create(TokenStream input) {
+    return new PatternCaptureGroupTokenFilter(input, preserveOriginal, pattern);
+  }
+}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilter.java	(revision 1444706)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilter.java	(working copy)
@@ -38,7 +38,6 @@
  * @see Pattern
  */
 public final class PatternReplaceFilter extends TokenFilter {
-  private final Pattern p;
   private final String replacement;
   private final boolean all;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@@ -60,7 +59,6 @@
                               String replacement,
                               boolean all) {
     super(in);
-    this.p=p;
     this.replacement = (null == replacement) ? "" : replacement;
     this.all=all;
     this.m = p.matcher(termAtt);
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternCaptureGroupTokenFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternCaptureGroupTokenFilter.java	(revision 0)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternCaptureGroupTokenFilter.java	(working copy)
@@ -0,0 +1,202 @@
+package org.apache.lucene.analysis.pattern;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.util.CharsRef;
+
+/**
+ * CaptureGroup uses Java regexes to emit multiple tokens - one for each capture
+ * group in one or more patterns.
+ * 
+ * <p>
+ * For example, a pattern like:
+ * </p>
+ * 
+ * <p>
+ * <code>"(https?://([a-zA-Z\-_0-9.]+))"</code>
+ * </p>
+ * 
+ * <p>
+ * when matched against the string "http://www.foo.com/index" would return the
+ * tokens "https://www.foo.com" and "www.foo.com".
+ * </p>
+ * 
+ * <p>
+ * If none of the patterns match, or if preserveOriginal is true, the original
+ * token will be preserved.
+ * </p>
+ * <p>
+ * Each pattern is matched as often as it can be, so the pattern
+ * <code> "(...)"</code>, when matched against <code>"abcdefghi"</code> would
+ * produce <code>["abc","def","ghi"]</code>
+ * </p>
+ * <p>
+ * A camelCaseFilter could be written as:
+ * </p>
+ * <p>
+ * <code>
+ *   "([A-Z]{2,})",                                 <br />
+ *   "(?&lt;![A-Z])([A-Z][a-z]+)",                     <br />
+ *   "(?:^|\\b|(?&lt;=[0-9_])|(?&lt;=[A-Z]{2}))([a-z]+)", <br />
+ *   "([0-9]+)"
+ * </code>
+ * </p>
+ * <p>
+ * plus if {@link #preserveOriginal} is true, it would also return
+ * <code>"camelCaseFilter</code>
+ * </p>
+ */
+public final class PatternCaptureGroupTokenFilter extends TokenFilter {
+  
+  private final CharTermAttribute charTermAttr = addAttribute(CharTermAttribute.class);
+  private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
+  private final OffsetAttribute offsetAttr = addAttribute(OffsetAttribute.class);
+  private final Matcher[] matchers;
+  private final CharsRef spare = new CharsRef();
+  private final int[] groupCounts;
+  private final boolean preserveOriginal;
+  private int[] currentGroup;
+  private int charOffsetStart;
+  private int currentMatcher;
+  
+  /**
+   * @param input
+   *          the input {@link TokenStream}
+   * @param preserveOriginal
+   *          set to true to return the original token even if one of the
+   *          patterns matches
+   * @param patterns
+   *          an array of {@link Pattern} objects to match against each token
+   */
+  public PatternCaptureGroupTokenFilter(TokenStream input,
+      boolean preserveOriginal, Pattern... patterns) {
+    super(input);
+    this.preserveOriginal = preserveOriginal;
+    this.matchers = new Matcher[patterns.length];
+    this.groupCounts = new int[patterns.length];
+    this.currentGroup = new int[patterns.length];
+    for (int i = 0; i < patterns.length; i++) {
+      this.matchers[i] = patterns[i].matcher("");
+      this.groupCounts[i] = this.matchers[i].groupCount();
+      this.currentGroup[i] = -1;
+    }
+  }
+  
+  private boolean nextCapture() {
+    int min_offset = Integer.MAX_VALUE;
+    currentMatcher = -1;
+    Matcher matcher;
+    
+    for (int i = 0; i < matchers.length; i++) {
+      matcher = matchers[i];
+      if (currentGroup[i] == -1) {
+        currentGroup[i] = matcher.find() ? 1 : 0;
+      }
+      if (currentGroup[i] != 0) {
+        while (currentGroup[i] < groupCounts[i] + 1) {
+          final int start = matcher.start(currentGroup[i]);
+          final int end = matcher.end(currentGroup[i]);
+          if (start == end || preserveOriginal && start == 0
+              && spare.length == end) {
+            currentGroup[i]++;
+            continue;
+          }
+          if (start < min_offset) {
+            min_offset = start;
+            currentMatcher = i;
+          }
+          break;
+        }
+        if (currentGroup[i] == groupCounts[i] + 1) {
+          currentGroup[i] = -1;
+          i--;
+        }
+      }
+    }
+    return currentMatcher != -1;
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    
+    if (currentMatcher != -1 && nextCapture()) {
+      
+      clearAttributes();
+      
+      final int start = matchers[currentMatcher]
+          .start(currentGroup[currentMatcher]);
+      final int end = matchers[currentMatcher]
+          .end(currentGroup[currentMatcher]);
+      
+      posAttr.setPositionIncrement(0);
+      charTermAttr.copyBuffer(spare.chars, start, end - start);
+      offsetAttr.setOffset(charOffsetStart + start, charOffsetStart + end);
+      currentGroup[currentMatcher]++;
+      return true;
+    }
+    
+    if (!input.incrementToken()) {
+      return false;
+    }
+    
+    char[] buffer = charTermAttr.buffer();
+    int length = charTermAttr.length();
+    spare.copyChars(buffer, 0, length);
+    charOffsetStart = offsetAttr.startOffset();
+    
+    for (int i = 0; i < matchers.length; i++) {
+      matchers[i].reset(spare);
+      currentGroup[i] = -1;
+    }
+    
+    if (preserveOriginal) {
+      currentMatcher = 0;
+    } else if (nextCapture()) {
+      final int start = matchers[currentMatcher]
+          .start(currentGroup[currentMatcher]);
+      final int end = matchers[currentMatcher]
+          .end(currentGroup[currentMatcher]);
+      
+      // if we start at 0 we can simply set the length and save the copy
+      if (start == 0) {
+        charTermAttr.setLength(end);
+      } else {
+        charTermAttr.copyBuffer(spare.chars, start, end - start);
+      }
+      offsetAttr.setOffset(charOffsetStart + start, charOffsetStart + end);
+      currentGroup[currentMatcher]++;
+    }
+    return true;
+    
+  }
+  
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    currentMatcher = -1;
+  }
+  
+}
Index: lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
===================================================================
--- lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory	(revision 1444706)
+++ lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory	(working copy)
@@ -69,6 +69,7 @@
 org.apache.lucene.analysis.no.NorwegianLightStemFilterFactory
 org.apache.lucene.analysis.no.NorwegianMinimalStemFilterFactory
 org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory
+org.apache.lucene.analysis.pattern.PatternCaptureGroupFilterFactory
 org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory
 org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterFactory
 org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory
Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt	(revision 1444706)
+++ lucene/CHANGES.txt	(working copy)
@@ -155,6 +155,12 @@
   to Lucene42DocValuesConsumer) if you want to make this tradeoff.
   (Adrien Grand, Robert Muir)
 
+* LUCENE-4766: Added a Pattern TokenFilter which emits a token for every
+  capturing group. This filter accepts multiple patterns and emits tokens for
+  every capturing group that is matched in any pattern. Patterns are not
+  anchored to the beginning and end of the string, so each pattern can produce
+  multiple matches. (Clinton Gormley, Simon Willnauer)
+
 API Changes
 
 * LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
