Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java	(revision 1460550)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java	(working copy)
@@ -71,8 +71,10 @@
 import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
 import org.apache.lucene.analysis.miscellaneous.LengthFilter;
 import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
 import org.apache.lucene.analysis.miscellaneous.TrimFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
 import org.apache.lucene.analysis.ngram.NGramTokenFilter;
@@ -578,6 +580,29 @@
         return map;
       }
     });
+    put(StemmerOverrideMap.class, new ArgProducer() {
+      @Override public Object create(Random random) {
+        int num = random.nextInt(10);
+        StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
+        for (int i = 0; i < num; i++) {
+          String input = ""; 
+          do {
+            input = _TestUtil.randomRealisticUnicodeString(random);
+          } while(input.isEmpty());
+          String out = ""; _TestUtil.randomSimpleString(random);
+          do {
+            out = _TestUtil.randomRealisticUnicodeString(random);
+          } while(out.isEmpty());
+          builder.add(input, out);
+        }
+        try {
+          return builder.build();
+        } catch (Exception ex) {
+          Rethrow.rethrow(ex);
+          return null; // unreachable code
+        }
+      }
+    });
     put(SynonymMap.class, new ArgProducer() {
       @Override public Object create(Random random) {
         SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java	(revision 1460550)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java	(working copy)
@@ -1,15 +1,4 @@
 package org.apache.lucene.analysis.miscellaneous;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -26,17 +15,124 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
 
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * 
+ */
 public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
   public void testOverride() throws IOException {
     // lets make booked stem to books
     // the override filter will convert "booked" to "books",
     // but also mark it with KeywordAttribute so Porter will not change it.
-    CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
-    dictionary.put("booked", "books");
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder();
+    builder.add("booked", "books");
     Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
-    TokenStream stream = new PorterStemFilter(
-        new StemmerOverrideFilter(tokenizer, dictionary));
-    assertTokenStreamContents(stream, new String[] { "books" });
+    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+        tokenizer, builder.build()));
+    assertTokenStreamContents(stream, new String[] {"books"});
   }
+  
+  public void testIgnoreCase() throws IOException {
+    // lets make booked stem to books
+    // the override filter will convert "booked" to "books",
+    // but also mark it with KeywordAttribute so Porter will not change it.
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
+    builder.add("boOkEd", "books");
+    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("BooKeD"));
+    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+        tokenizer, builder.build()));
+    assertTokenStreamContents(stream, new String[] {"books"});
+  }
+  
+  public void testRandomRealisticWhiteSpace() throws IOException {
+    Map<String,String> map = new HashMap<String,String>();
+    int numTerms = atLeast(50);
+    for (int i = 0; i < numTerms; i++) {
+      String randomRealisticUnicodeString = _TestUtil
+          .randomRealisticUnicodeString(random());
+      char[] charArray = randomRealisticUnicodeString.toCharArray();
+      StringBuilder builder = new StringBuilder();
+      for (int j = 0; j < charArray.length;) {
+        int cp = Character.codePointAt(charArray, j);
+        if (!Character.isWhitespace(cp)) {
+          builder.appendCodePoint(cp);
+        }
+        j += Character.charCount(cp);
+      }
+      if (builder.length() > 0) {
+        String value = _TestUtil.randomSimpleString(random());
+        map.put(builder.toString(),
+            value.isEmpty() ? "a" : value);
+        
+      }
+    }
+    if (map.isEmpty()) {
+      map.put("booked", "books");
+    }
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
+    Set<Entry<String,String>> entrySet = map.entrySet();
+    StringBuilder input = new StringBuilder();
+    List<String> output = new ArrayList<String>();
+    for (Entry<String,String> entry : entrySet) {
+      builder.add(entry.getKey(), entry.getValue());
+      if (random().nextBoolean() || output.isEmpty()) {
+        input.append(entry.getKey()).append(" ");
+        output.add(entry.getValue());
+      }
+    }
+    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+        new StringReader(input.toString()));
+    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+        tokenizer, builder.build()));
+    assertTokenStreamContents(stream, output.toArray(new String[0]));
+  }
+  
+  public void testRandomRealisticKeyword() throws IOException {
+    Map<String,String> map = new HashMap<String,String>();
+    int numTerms = atLeast(50);
+    for (int i = 0; i < numTerms; i++) {
+      String randomRealisticUnicodeString = _TestUtil
+          .randomRealisticUnicodeString(random());
+      if (randomRealisticUnicodeString.length() > 0) {
+        String value = _TestUtil.randomSimpleString(random());
+        map.put(randomRealisticUnicodeString,
+            value.isEmpty() ? "a" : value);
+      }
+    }
+    if (map.isEmpty()) {
+      map.put("booked", "books");
+    }
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
+    Set<Entry<String,String>> entrySet = map.entrySet();
+    for (Entry<String,String> entry : entrySet) {
+      builder.add(entry.getKey(), entry.getValue());
+    }
+    StemmerOverrideMap build = builder.build();
+    for (Entry<String,String> entry : entrySet) {
+      if (random().nextBoolean()) {
+        Tokenizer tokenizer = new KeywordTokenizer(new StringReader(
+            entry.getKey()));
+        TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+            tokenizer, build));
+        assertTokenStreamContents(stream, new String[] {entry.getValue()});
+      }
+    }
+  }
 }
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java	(revision 1460550)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java	(working copy)
@@ -21,6 +21,7 @@
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
@@ -30,9 +31,13 @@
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.analysis.util.CharArrayMap;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util.fst.FST;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -96,7 +101,7 @@
    */
   private CharArraySet excltable = CharArraySet.EMPTY_SET;
 
-  private final CharArrayMap<String> stemdict;
+  private final StemmerOverrideMap stemdict;
   private final Version matchVersion;
 
   /**
@@ -120,7 +125,24 @@
     this.matchVersion = matchVersion;
     this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
-    this.stemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
+    if (stemOverrideDict.isEmpty()) {
+      this.stemdict = null;
+    } else {
+      // we don't need to ignore case here since we lowercase in this analyzer anyway
+      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
+      CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
+      CharsRef spare = new CharsRef();
+      while (iter.hasNext()) {
+        char[] nextKey = iter.nextKey();
+        spare.copyChars(nextKey, 0, nextKey.length);
+        builder.add(spare, iter.currentValue());
+      }
+      try {
+        this.stemdict = builder.build();
+      } catch (IOException ex) {
+        throw new RuntimeException("can not build stem dict", ex);
+      }
+    }
   }
   
   /**
@@ -141,7 +163,7 @@
     result = new StopFilter(matchVersion, result, stoptable);
     if (!excltable.isEmpty())
       result = new SetKeywordMarkerFilter(result, excltable);
-    if (!stemdict.isEmpty())
+    if (stemdict != null)
       result = new StemmerOverrideFilter(result, stemdict);
     result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
     return new TokenStreamComponents(source, result);
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java	(revision 1460550)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java	(working copy)
@@ -18,22 +18,35 @@
  */
 
 import java.io.IOException;
+import java.util.ArrayList;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArrayMap;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FST.Arc;
+import org.apache.lucene.util.fst.FST.BytesReader;
 
 /**
  * Provides the ability to override any {@link KeywordAttribute} aware stemmer
  * with custom dictionary-based stemming.
  */
 public final class StemmerOverrideFilter extends TokenFilter {
-  private final CharArrayMap<String> dictionary;
+  private final StemmerOverrideMap stemmerOverrideMap;
   
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
+  private final BytesReader fstReader;
+  private final Arc<BytesRef> scratchArc = new FST.Arc<BytesRef>();
+;
+  private final CharsRef spare = new CharsRef();
   
   /**
    * Create a new StemmerOverrideFilter, performing dictionary-based stemming
@@ -43,19 +56,24 @@
    * so that they will not be stemmed with stemmers down the chain.
    * </p>
    */
-  public StemmerOverrideFilter(TokenStream input,
-      CharArrayMap<String> dictionary) {
+  public StemmerOverrideFilter(final TokenStream input, final StemmerOverrideMap stemmerOverrideMap) {
     super(input);
-    this.dictionary = dictionary;
+    this.stemmerOverrideMap = stemmerOverrideMap;
+    fstReader = stemmerOverrideMap.getBytesReader();
   }
-
+  
   @Override
   public boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
       if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
-        String stem = dictionary.get(termAtt.buffer(), 0, termAtt.length());
+        final BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
         if (stem != null) {
-          termAtt.setEmpty().append(stem);
+          final char[] buffer = spare.chars = termAtt.buffer();
+          UnicodeUtil.UTF8toUTF16(stem.bytes, stem.offset, stem.length, spare);
+          if (spare.chars != buffer) {
+            termAtt.copyBuffer(spare.chars, spare.offset, spare.length);
+          }
+          termAtt.setLength(spare.length);
           keywordAtt.setKeyword(true);
         }
       }
@@ -64,4 +82,130 @@
       return false;
     }
   }
+  
+  /**
+   * A read-only 4-byte FST backed map that allows fast case-insensitive key
+   * value lookups for {@link StemmerOverrideFilter}
+   */
+  // TODO maybe we can generalize this and reuse this map somehow?
+  public final static class StemmerOverrideMap {
+    private final FST<BytesRef> fst;
+    private final boolean ignoreCase;
+    
+    /**
+     * Creates a new {@link StemmerOverrideMap} 
+     * @param fst the fst to lookup the overrides
+     * @param ignoreCase if the keys case should be ingored
+     */
+    StemmerOverrideMap(FST<BytesRef> fst, boolean ignoreCase) {
+      this.fst = fst;
+      this.ignoreCase = ignoreCase;
+    }
+    
+    /**
+     * Returns a {@link BytesReader} to pass to the {@link #get(char[], int, Arc, BytesReader)} method.
+     */
+    BytesReader getBytesReader() {
+      return fst.getBytesReader();
+    }
+
+    /**
+     * Returns the value mapped to the given key or <code>null</code> if the key is not in the FST dictionary.
+     */
+    final BytesRef get(char[] buffer, int bufferLen, Arc<BytesRef> scratchArc, BytesReader fstReader) throws IOException {
+      BytesRef pendingOutput = fst.outputs.getNoOutput();
+      BytesRef matchOutput = null;
+      int bufUpto = 0;
+      fst.getFirstArc(scratchArc);
+      while (bufUpto < bufferLen) {
+        final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen);
+        if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) {
+          return null;
+        }
+        pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
+        bufUpto += Character.charCount(codePoint);
+      }
+      if (scratchArc.isFinal()) {
+        matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
+      }
+      return matchOutput;
+    }
+    
+  }
+  /**
+   * This builder builds an {@link FST} for the {@link StemmerOverrideFilter}
+   */
+  public static class Builder {
+    private final BytesRefHash hash = new BytesRefHash();
+    private final BytesRef spare = new BytesRef();
+    private final ArrayList<CharSequence> outputValues = new ArrayList<CharSequence>();
+    private final boolean ignoreCase;
+    private final CharsRef charsSpare = new CharsRef();
+    
+    /**
+     * Creates a new {@link Builder} with ignoreCase set to <code>false</code> 
+     */
+    public Builder() {
+      this(false);
+    }
+    
+    /**
+     * Creates a new {@link Builder}
+     * @param ignoreCase if the input case should be ignored.
+     */
+    public Builder(boolean ignoreCase) {
+      this.ignoreCase = ignoreCase;
+    }
+    
+    /**
+     * Adds an input string and it's stemmer override output to this builder.
+     * 
+     * @param input the input char sequence 
+     * @param output the stemmer override output char sequence
+     * @return <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>.
+     */
+    public boolean add(CharSequence input, CharSequence output) {
+      final int length = input.length();
+      if (ignoreCase) {
+        // convert on the fly to lowercase
+        charsSpare.grow(length);
+        final char[] buffer = charsSpare.chars;
+        for (int i = 0; i < length; ) {
+            i += Character.toChars(
+                    Character.toLowerCase(
+                        Character.codePointAt(input, i)), buffer, i);
+        }
+        UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
+      } else {
+        UnicodeUtil.UTF16toUTF8(input, 0, length, spare);
+      }
+      if (hash.add(spare) >= 0) {
+        outputValues.add(output);
+        return true;
+      }
+      return false;
+    }
+    
+    /**
+     * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
+     * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
+     * @throws IOException if an {@link IOException} occurs;
+     */
+    public StemmerOverrideMap build() throws IOException {
+      ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
+      org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<BytesRef>(
+          FST.INPUT_TYPE.BYTE4, outputs);
+      final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
+      IntsRef intsSpare = new IntsRef();
+      final int size = hash.size();
+      for (int i = 0; i < size; i++) {
+        int id = sort[i];
+        BytesRef bytesRef = hash.get(id, spare);
+        UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
+        builder.add(intsSpare, new BytesRef(outputValues.get(id)));
+      }
+      return new StemmerOverrideMap(builder.finish(), ignoreCase);
+    }
+    
+  }
 }
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java	(revision 1460550)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java	(working copy)
@@ -19,10 +19,15 @@
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Locale;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
 import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.fst.FST;
 
 /**
  * Factory for {@link StemmerOverrideFilter}.
@@ -36,7 +41,7 @@
  *
  */
 public class StemmerOverrideFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
-  private CharArrayMap<String> dictionary = null;
+  private StemmerOverrideMap dictionary;
   private boolean ignoreCase;
 
   @Override
@@ -47,15 +52,15 @@
       assureMatchVersion();
       List<String> files = splitFileNames(dictionaryFiles);
       if (files.size() > 0) {
-        dictionary = new CharArrayMap<String>(luceneMatchVersion, 
-            files.size() * 10, ignoreCase);
+        StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
         for (String file : files) {
           List<String> list = getLines(loader, file.trim());
           for (String line : list) {
             String[] mapping = line.split("\t", 2);
-            dictionary.put(mapping[0], mapping[1]);
+            builder.add(mapping[0], mapping[1]);
           }
         }
+        dictionary = builder.build();
       }
     }
   }
Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt	(revision 1460550)
+++ lucene/CHANGES.txt	(working copy)
@@ -150,6 +150,9 @@
 * LUCENE-4571: Speed up BooleanQuerys with minNrShouldMatch to use
   skipping.  (Stefan Pohl via Robert Muir)
 
+* LUCENE-4863: StemmerOverrideFilter now uses a FST to represent its overrides
+  in memory. (Simon Willnauer)
+
 API Changes
 
 * LUCENE-4844: removed TaxonomyReader.getParent(), you should use
