Index: src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
===================================================================
--- src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java	(revision 0)
+++ src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java	(revision 0)
@@ -0,0 +1,141 @@
+package org.apache.lucene.util.automaton;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.MinimizationOperations;
+import org.apache.lucene.util.automaton.SpecialOperations;
+
+public class TestLevenshteinAutomata extends LuceneTestCase {
+
+  /**
+   * Some simple tests for Distance 1.
+   * We exercise all possible 3-profiles.
+   */
+  public void testLev1() throws Exception {
+    assertLev1("otter");
+    assertLev1("atlas");
+    assertLev1("aaabbb");
+    assertLev1("aaabb");
+    assertLev1("aaab");
+    assertLev1("aabb");
+    assertLev1("abab");
+    assertLev1("b");
+    assertLev1("");
+  }
+  
+  /**
+   * Test that the DFA generated for degree 1 matches one generated with the naive algorithm.
+   */
+  private void assertLev1(String s) {
+    Automaton a1 = Lev1AutomatonSlow(s);
+    Automaton a2 = LevAutomatonFast(s, 1);
+    // Automaton .equals() means they accept the same language.
+    assertEquals(a1, a2);
+  }
+  
+  /**
+   * Return an automata that accepts all strings within an edit distance of n from s.
+   */
+  private Automaton LevAutomatonFast(String s, int n) {
+    LevenshteinAutomata a = new LevenshteinAutomata(s);
+    return a.toAutomaton(n);
+  }
+  
+  /**
+   * Return an automaton that accepts all 1-character insertions, deletions, and
+   * substitutions of s.
+   */
+  private Automaton Lev1AutomatonSlow(String s) {
+    Automaton a = BasicAutomata.makeString(s);
+    a = BasicOperations.union(a, insertionsOf(s));
+    MinimizationOperations.minimize(a);
+    a = BasicOperations.union(a, deletionsOf(s));
+    MinimizationOperations.minimize(a);
+    a = BasicOperations.union(a, substitutionsOf(s));
+    MinimizationOperations.minimize(a);
+    
+    return a;
+  }
+  
+  /**
+   * Return an automaton that accepts all 1-character insertions of s (inserting
+   * one character)
+   */
+  private Automaton insertionsOf(String s) {
+    List<Automaton> list = new ArrayList<Automaton>();
+    
+    for (int i = 0; i <= s.length(); i++) {
+      Automaton a = BasicAutomata.makeString(s.substring(0, i));
+      a = BasicOperations.concatenate(a, BasicAutomata.makeAnyChar());
+      a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
+          .substring(i)));
+      list.add(a);
+    }
+    
+    Automaton a = BasicOperations.union(list);
+    MinimizationOperations.minimize(a);
+    return a;
+  }
+  
+  /**
+   * Return an automaton that accepts all 1-character deletions of s (deleting
+   * one character).
+   */
+  private Automaton deletionsOf(String s) {
+    List<Automaton> list = new ArrayList<Automaton>();
+    
+    for (int i = 0; i < s.length(); i++) {
+      Automaton a = BasicAutomata.makeString(s.substring(0, i));
+      a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
+          .substring(i + 1)));
+      a.expandSingleton();
+      list.add(a);
+    }
+    
+    Automaton a = BasicOperations.union(list);
+    MinimizationOperations.minimize(a);
+    return a;
+  }
+  
+  /**
+   * Return an automaton that accepts all 1-character substitutions of s
+   * (replacing one character)
+   */
+  private Automaton substitutionsOf(String s) {
+    List<Automaton> list = new ArrayList<Automaton>();
+    
+    for (int i = 0; i < s.length(); i++) {
+      Automaton a = BasicAutomata.makeString(s.substring(0, i));
+      a = BasicOperations.concatenate(a, BasicAutomata.makeAnyChar());
+      a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
+          .substring(i + 1)));
+      list.add(a);
+    }
+    
+    Automaton a = BasicOperations.union(list);
+    MinimizationOperations.minimize(a);
+    return a;
+  }
+}

Property changes on: src\test\org\apache\lucene\util\automaton\TestLevenshteinAutomata.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: src/java/org/apache/lucene/search/FuzzyTermsEnum.java
===================================================================
--- src/java/org/apache/lucene/search/FuzzyTermsEnum.java	(revision 908826)
+++ src/java/org/apache/lucene/search/FuzzyTermsEnum.java	(working copy)
@@ -17,22 +17,228 @@
  * limitations under the License.
  */
 
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.BytesRef.Comparator;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.apache.lucene.util.automaton.RunAutomaton;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
-/** Subclass of FilteredTermEnum for enumerating all terms that are similar
+/** Subclass of TermsEnum for enumerating all terms that are similar
  * to the specified filter term.
  *
  * <p>Term enumerations are always ordered by
  * {@link #getTermComparator}.  Each term in the enumeration is
  * greater than all that precede it.</p>
  */
-public final class FuzzyTermsEnum extends FilteredTermsEnum {
+public final class FuzzyTermsEnum extends TermsEnum {
+  private TermsEnum actualEnum;
+  private MultiTermQuery.BoostAttribute actualBoostAtt;
+  
+  private final MultiTermQuery.BoostAttribute boostAtt =
+    attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
+  // nocommit bad variable naming, its the max nonLowerBound... 
+  private float lowerBound = boostAtt.getMaxNonCompetitiveBoost();
+  
+  private final float minSimilarity;
+  private final float scale_factor;
+  
+  private final int termLength;
+  
+  private int maxEdits;
+  private List<Automaton> automata;
+  private List<RunAutomaton> runAutomata;
+  
+  private final IndexReader reader;
+  private final Term term;
+  private final int prefixLength;
+  
+  public FuzzyTermsEnum(IndexReader reader, Term term, 
+      final float minSimilarity, final int prefixLength) throws IOException {
+    this.reader = reader;
+    this.term = term;
+    this.prefixLength = prefixLength;
+    this.minSimilarity = minSimilarity;
+    this.scale_factor = 1.0f / (1.0f - minSimilarity);
+    this.termLength = term.text().length();
+    // calculate the maximum k edits for this similarity, and build automata for 0..n, where n<=k
+    maxEdits = initialMaxDistance(minSimilarity, termLength);
+    LevenshteinAutomata la = new LevenshteinAutomata(term.text());
+    automata = new ArrayList<Automaton>(maxEdits);
+    runAutomata = new ArrayList<RunAutomaton>(maxEdits);
+    for (int i = 0; i <= maxEdits; i++) {
+      Automaton a = la.toAutomaton(i);
+      if (a == null)
+        break;
+      automata.add(a);
+      runAutomata.add(new RunAutomaton(a));
+    }
+    TermsEnum subEnum = getActualEnum(maxEdits);
+    setEnum(subEnum != null ? subEnum : 
+      new LinearFuzzyTermsEnum(reader, term, minSimilarity, prefixLength));
+  }
+  
+  private TermsEnum getActualEnum(int editDistance) throws IOException {
+    if (editDistance < automata.size()) {
+      return new AutomatonFuzzyTermsEnum(automata.get(editDistance), term, reader, 
+          minSimilarity, prefixLength, 
+          runAutomata.subList(0, editDistance + 1).toArray(new RunAutomaton[0]));
+    } else {
+      return null;
+    }
+  }
+ 
+  void setEnum(TermsEnum actualEnum) {
+    this.actualEnum = actualEnum;
+    this.actualBoostAtt = actualEnum.attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
+  }
+  
+  /** fired when the max non-competitive boost has changed.
+   *  this is the hook to swap in a smarter actualEnum
+   */
+  void minBoostChanged(float boostValue) {
+    System.err.println("pq is full, minimum boost has changed: " + boostValue);
+    // TODO, do something, and swap the enum to a more efficient one.
+  }
+   
+  // for some raw minimum similarity and input term length, what is the maximum # of edits?
+  static int initialMaxDistance(float minimumSimilarity, int termLen) {
+    return (int) ((1-minimumSimilarity) * termLen);
+  }
 
+  @Override
+  public BytesRef next() throws IOException {
+    BytesRef term = actualEnum.next();
+    boostAtt.setBoost(actualBoostAtt.getBoost());
+    final float lowerBound = boostAtt.getMaxNonCompetitiveBoost();
+    if (lowerBound != this.lowerBound) {
+      this.lowerBound = lowerBound;
+      minBoostChanged(lowerBound);
+    }
+    return term;
+  }
+  
+  // proxy all other enum calls to the actual enum
+  @Override
+  public int docFreq() {
+    return actualEnum.docFreq();
+  }
+  
+  @Override
+  public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
+    return actualEnum.docs(skipDocs, reuse);
+  }
+  
+  @Override
+  public DocsAndPositionsEnum docsAndPositions(Bits skipDocs,
+      DocsAndPositionsEnum reuse) throws IOException {
+    return actualEnum.docsAndPositions(skipDocs, reuse);
+  }
+  
+  @Override
+  public Comparator getComparator() throws IOException {
+    return actualEnum.getComparator();
+  }
+  
+  @Override
+  public long ord() throws IOException {
+    return actualEnum.ord();
+  }
+  
+  @Override
+  public SeekStatus seek(BytesRef text) throws IOException {
+    return actualEnum.seek(text);
+  }
+  
+  @Override
+  public SeekStatus seek(long ord) throws IOException {
+    return actualEnum.seek(ord);
+  }
+  
+  @Override
+  public BytesRef term() throws IOException {
+    return actualEnum.term();
+  }
+}
+
+/**
+ * Implement fuzzy enumeration with automaton.
+ * <p>
+ * This is the fastest method as opposed to LinearFuzzyTermsEnum:
+ *  as enumeration is logarithmic to the number of terms (instead of linear)
+ * and comparison is linear to length of the term (rather than quadratic)
+ */
+final class AutomatonFuzzyTermsEnum extends AutomatonTermsEnum {
+  private final RunAutomaton matchers[];
+  // used for unicode conversion from BytesRef byte[] to char[]
+  private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
+  
+  private final float minimumSimilarity;
+  private final float scale_factor;
+  
+  private final BytesRef prefixBytesRef;
+  private final int fullSearchTermLength;
+  private final BytesRef termRef;
+  
+  private final MultiTermQuery.BoostAttribute boostAtt =
+    attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
+  
+  public AutomatonFuzzyTermsEnum(Automaton automaton, Term queryTerm,
+      IndexReader reader, float minSimilarity, int prefixLength, RunAutomaton matchers[]) throws IOException {
+    super(automaton, queryTerm, reader, false);
+    this.minimumSimilarity = minSimilarity;
+    this.scale_factor = 1.0f / (1.0f - minimumSimilarity);
+    this.matchers = matchers;
+    termRef = new BytesRef(queryTerm.text());
+    //The prefix could be longer than the word.
+    //It's kind of silly though.  It means we must match the entire word.
+    fullSearchTermLength = queryTerm.text().length();
+    final int realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength;
+    final String prefix = queryTerm.text().substring(0, realPrefixLength);
+    prefixBytesRef = new BytesRef(prefix);
+  }
+  
+  @Override
+  protected AcceptStatus accept(BytesRef term) {
+    if (term.startsWith(prefixBytesRef)) {
+      if (term.equals(termRef)) { // ed = 0
+        boostAtt.setBoost(1.0F);
+        return AcceptStatus.YES_AND_SEEK;
+      }
+      
+      UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16);
+      
+      for (int i = 1; i < matchers.length; i++) 
+        if (matchers[i].run(utf16.result, 0, utf16.length)) {
+          final float similarity = 1.0f - ((float)i / (float) (Math.min(utf16.length, fullSearchTermLength)));
+          if (similarity > minimumSimilarity) {
+            boostAtt.setBoost((float)((similarity - minimumSimilarity) * scale_factor));
+            return AcceptStatus.YES_AND_SEEK;
+          } else {
+            // TODO: optimize and intersect automata with length restrictions up front so this can't happen.
+            return AcceptStatus.NO_AND_SEEK;
+          }
+        }
+      
+      return AcceptStatus.NO_AND_SEEK;
+    } else {
+      return AcceptStatus.END;
+    }
+  }
+}
+
+final class LinearFuzzyTermsEnum extends FilteredTermsEnum {
+
   /* This should be somewhere around the average long word.
    * If it is longer, we waste time and space. If it is shorter, we waste a
    * little bit of time growing the array as we encounter longer words.
@@ -68,7 +274,7 @@
    * @param prefixLength Length of required common prefix. Default value is 0.
    * @throws IOException
    */
-  public FuzzyTermsEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException {
+  public LinearFuzzyTermsEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException {
     super(reader, term.field());
     
     if (minSimilarity >= 1.0f)
Index: src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java
===================================================================
--- src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java	(revision 0)
+++ src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java	(revision 0)
@@ -0,0 +1,432 @@
+package org.apache.lucene.util.automaton;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.BitSet;
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.State;
+import org.apache.lucene.util.automaton.Transition;
+
+/**
+ * Class to construct DFAs that match a word within some edit distance.
+ * <p>
+ * Implements the algorithm described in:
+ * Schulz and Mihov: Fast String Correction with Levenshtein Automata
+ * <p>
+ * @lucene.experimental
+ */
+public class LevenshteinAutomata {
+  /* input word */
+  final String input;
+  final char word[];
+  /* the automata alphabet. */
+  final char alphabet[];
+  /* X(x, V) for all x in alphabet */
+  final BitSet charvectors[];
+
+  /* the unicode ranges outside of alphabet */
+  final char rangeLower[];
+  final char rangeUpper[];
+  int numRanges = 0;
+  
+  ParametricDescription descriptions[]; 
+  
+  /**
+   * Create a new LevenshteinAutomata for some input String.
+   */
+  public LevenshteinAutomata(String input) {
+    this.input = input;
+    this.word = input.toCharArray();
+    
+    // calculate the alphabet
+    SortedSet<Character> set = new TreeSet<Character>();
+    for (int i = 0; i < word.length; i++)
+      set.add(word[i]);
+    alphabet = new char[set.size()];
+    Iterator<Character> iterator = set.iterator();
+    for (int i = 0; i < alphabet.length; i++)
+      alphabet[i] = iterator.next();
+    
+    // calculate all characteristic vectors
+    charvectors = new BitSet[Character.MAX_VALUE];
+    for (char c : set)
+      charvectors[c] = calcFullVector(c, word);
+    
+    rangeLower = new char[alphabet.length + 2];
+    rangeUpper = new char[alphabet.length + 2];
+    // calculate the unicode range intervals that exclude the alphabet
+    // these are the ranges for all unicode characters not in the alphabet
+    int lower = 0;
+    for (int i = 0; i < alphabet.length; i++) {
+      char higher = alphabet[i];
+      if (higher > lower) {
+        rangeLower[numRanges] = (char) lower;
+        rangeUpper[numRanges] = (char) (higher - 1);
+        numRanges++;
+      }
+      lower = higher + 1;
+    }
+    /* add the final endpoint */
+    if (lower <= 0xFFFF) {
+      rangeLower[numRanges] = (char) lower;
+      rangeUpper[numRanges] = '\uFFFF';
+      numRanges++;
+    }
+    
+    descriptions = new ParametricDescription[] {
+        null, /* for n=0, we do not need to go through the trouble */
+        new Lev1ParametricDescription(input.length())
+    };
+  }
+  
+  /**
+   * Compute a DFA that accepts all strings within an edit distance of <code>n</code>.
+   * <p>
+   * All automata have the following properties:
+   * <ul>
+   * <li>They are deterministic (DFA).
+   * <li>There are no transitions to dead states.
+   * <li>They are not minimal (some transitions could be combined).
+   * </ul>
+   * </p>
+   */
+  public Automaton toAutomaton(int n) {
+    if (n == 0)
+      return BasicAutomata.makeString(input);
+    
+    if (n >= descriptions.length)
+      return null;
+    
+    final int range = 2*n+1;
+    ParametricDescription description = descriptions[n];
+    // the number of states is based on the length of the word and n
+    State states[] = new State[description.size()];
+    // create all states, and mark as accept states if appropriate
+    for (int i = 0; i < states.length; i++) {
+      states[i] = new State();
+      states[i].setAccept(description.isAccept(i));
+    }
+    // create transitions from state to state
+    for (int k = 0; k < states.length; k++) {
+      final int xpos = description.getPosition(k);
+      if (xpos < 0)
+        continue;
+      final int end = xpos + Math.min(word.length - xpos, range);
+      
+      for (int x = 0; x < alphabet.length; x++) {
+        final char ch = alphabet[x];
+        // get the characteristic vector at this position wrt ch
+        final int cvec = getVector(ch, xpos, end);
+        int dest = description.transition(k, xpos, cvec);
+        if (dest >= 0)
+          states[k].addTransition(new Transition(ch, states[dest]));
+      }
+      // add transitions for all other chars in unicode
+      // by definition, their characteristic vectors are always 0,
+      // because they do not exist in the input string.
+      int dest = description.transition(k, xpos, 0); // by definition
+      if (dest >= 0)
+        for (int r = 0; r < numRanges; r++)
+          states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));      
+    }
+
+    Automaton a = new Automaton();
+    a.setInitialState(states[0]);
+    a.setDeterministic(true);
+    // we should not need to trim transitions to dead states, we do not create these?
+    //
+    //a.restoreInvariant();
+    return a;
+  }
+  
+  /**
+   * Get the characteristic vector <code>X(x, V)</code> 
+   * where V is <code>substring(pos, end)</code>
+   */
+  int getVector(char x, int pos, int end) {
+    int vector = 0;
+    for (int i = pos; i < end; i++) {
+      vector <<= 1;
+      if (word[i] == x)
+        vector |= 1;
+    }
+    return vector;
+  }
+  
+  /**
+   * Precompute all characteristic vectors <code>X(x, V)</code>
+   * for some x where V is the entire word.
+   * This makes retrieving the vectors for some k-profile sequence
+   * at any position in {@link #getVector(char, int, int)}
+   * simply a bit substring operation.
+   */
+  static BitSet calcFullVector(char x, char word[]) {
+    BitSet charvector = new BitSet(word.length);
+    for (int i = 0; i < word.length; i++)
+      charvector.set(i, word[i] == x);
+    return charvector;
+  }
+  
+  /**
+   * A ParametricDescription describes the structure of a Levenshtein DFA for some degree n.
+   * <p>
+   * There are four components of a parametric description, all parameterized on the length
+   * of the word <code>w</code>:
+   * <ol>
+   * <li>The number of states: {@link #size()}
+   * <li>The set of final states: {@link #isAccept(int)}
+   * <li>The transition function: {@link #transition(int, int, int)}
+   * <li>Minimal boundary function: {@link #getPosition(int)}
+   * </ol>
+   */
+  abstract class ParametricDescription {
+    protected int w;
+    
+    ParametricDescription(int w) {
+      this.w = w;
+    }
+    
+    /**
+     * Return the number of states needed to compute a Levenshtein DFA
+     */
+    abstract int size();
+    /**
+     * Returns true if the <code>state</code> in any Levenshtein DFA is an accept state (final state).
+     */
+    abstract boolean isAccept(int state);
+    /**
+     * Returns the position in the input word for a given <code>state</code>.
+     * This is the minimal boundary for the state.
+     */
+    abstract int getPosition(int state);
+    
+    /**
+     * Returns the state number for a transition from the given <code>state</code>,
+     * assuming <code>position</code> and characteristic vector <code>vector</code>
+     */
+    abstract int transition(int state, int position, int vector);
+  }
+  
+  // Levenshtein Automata for n=1
+  // consider everything below here automatically generated code!!!!!
+  //
+  final class Lev1ParametricDescription extends ParametricDescription {
+    Lev1ParametricDescription(int w) {
+      super(w);
+    }
+    // convenience to convert from absolute to parametric
+    final int A = 0;
+    final int B = w + 1;
+    final int C = 2*w + 2;
+    final int D = 4*w + 4;
+    final int E = 6*w + 6;
+
+    @Override
+    boolean isAccept(int state) {      
+      switch(w) {
+        case 0:
+          return state == A + w || state == B + w; 
+        case 1:
+          return state == A + w || state == A + (w-1) || state == B + w || state == C + (w-1);
+        default:
+          return state == A + w || state == A + (w-1) || state == B + w || state == C + (w-1)
+            || state == D + (w-2) || state == E + (w-2);
+      }
+    }
+    
+    @Override
+    int getPosition(int state) {
+      int translatedPosition = -1;
+      
+      if (state < B)
+        translatedPosition = state - A;
+      else if (state < C)
+        translatedPosition = state - B;
+      else if (state < D)
+        translatedPosition = state - C;
+      else if (state < E)
+        translatedPosition = state - D;
+      else
+        translatedPosition = state - E;
+      
+      if (0 <= translatedPosition && translatedPosition <= w)
+        return translatedPosition;
+      else
+        return -1;
+    }
+
+    @Override
+    int size() {
+      // Ai: 1 state for 0 to w
+      // Bi: 1 state for 0 to w
+      // Ci: 2 states for 0 to w - 1
+      // Di: 2 states for 0 to w - 2
+      // Ei: 3 states for 0 to w - 2
+      
+      // overfit a little to make computing easier
+      return (w + 1) * 9;
+    }
+
+    @Override
+    int transition(int state, int position, int vector) {
+      if (position == w)
+        return transition0(state, position, vector);
+      else if (position == w - 1)
+        return transition1(state, position, vector);
+      else if (position == w - 2)
+        return transition2(state, position, vector);
+      else
+        return transition3(state, position, vector);
+    }
+    
+    int transition0(int state, int position, int vector) {
+      if (state < B)
+        return B + state;
+      return -1;
+    }
+    
+    int transition1(int state, int position, int vector) {
+      switch(vector) {
+        case 0: /* <0> */
+          if (state < B) // Ai -> Ci
+            return C + (state - A);
+          else
+            return -1;
+        default: /* <1> */
+          if (state < B) // Ai -> Ai+1
+            return state + 1;
+          else if (state < C) // Bi -> Bi+1
+            return state + 1;
+          else if (state < D) // Ci -> Bi+1
+            return B + (state - C) + 1;
+          else
+            return -1;
+      }
+    }
+    
+    int transition2(int state, int position, int vector) {
+      switch(vector) {
+        case 0: /* <0,0> */
+          if (state < B) // Ai -> Ci
+            return C + (state - A);
+          else
+            return -1;
+        case 1: /* <0,1> */
+          if (state < B) // Ai -> Ei
+            return E + (state - A);
+          else if (state < C)
+            return -1;
+          else if (state < D) // Ci -> Bi+2
+            return B + (state - C) + 2;
+          else if (state < E)
+            return -1;
+          else // Ei -> Bi+2
+            return B + (state - E) + 2;
+        case 2: /* <1,0> */
+          if (state < C) // Ai->Ai+1,Bi->Bi+1
+            return state + 1;
+          else if (state < D) // Ci -> Bi+1
+            return B + (state - C) + 1;
+          else if (state < E) // Di -> Bi+1
+            return B + (state - D) + 1;
+          else // Ei -> Bi+1
+            return B + (state - E) + 1;
+        default: /* <1,1> */
+         if (state < D) // Ai->Ai+1,Bi->Bi+1,Ci->Ci+1
+           return state + 1;
+         else if (state < E) // Di -> Bi+1
+           return B + (state - D) + 1;
+         else // Ei -> Ci+1
+           return C + (state - E) + 1;
+      }
+    }
+    
+    int transition3(int state, int position, int vector) {
+      switch(vector) {
+        case 0: /* <0,0,0> */
+          if (state < B) // Ai -> Ci
+            return C + (state - A);
+          else
+            return -1;
+        case 1: /* <0,0,1> */
+          if (state < B) // Ai -> Ci
+            return C + (state - A);
+          else if (state < D)
+            return -1;
+          else if (state < E) // Di -> Bi+3
+            return B + (state - D) + 3;
+          else // Ei -> Bi+3
+            return B + (state - E) + 3;
+        case 2: /* <0,1,0> */
+          if (state < B) // Ai -> Ei
+            return E + (state - A);
+          else if (state < C)
+            return -1;
+          else if (state < D) // Ci -> Bi+2
+            return B + (state - C) + 2;
+          else if (state < E)
+            return -1;
+          else // Ei -> Bi+2
+            return B + (state - E) + 2;
+        case 3: /* <0,1,1> */
+          if (state < B) // Ai -> Ei
+            return E + (state - A);
+          else if (state < C)
+            return -1;
+          else if (state < D) // Ci>Bi+2
+            return B + (state - C) + 2;
+          else if (state < E) // Di -> Bi+3
+            return B + (state - D) + 3;
+          else // Ei -> Ci+2
+            return C + (state - E) + 2;
+        case 4: /* <1,0,0> */
+          if (state < C) // Ai->Ai+1,Bi->Bi+1
+            return state + 1;
+          else if (state < D) // Ci -> Bi+1
+            return B + (state - C) + 1;
+          else if (state < E) // Di -> Bi+1
+            return B + (state - D) + 1;
+          else // Ei -> Bi+1
+            return B + (state - E) + 1;
+        case 5: /* <1,0,1> */
+          if (state < C) // Ai->Ai+1,Bi->Bi+1
+            return state + 1;
+          else if (state < D) // Ci->Bi+1
+            return B + (state - C) + 1;
+          else if (state < E) // Di->Di+1
+            return state + 1;
+          else // Ei -> Di+1
+            return D + (state - E) + 1;
+        case 6: /* <1,1,0> */
+          if (state < D) // Ai->Ai+1,Bi->Bi+1,Ci->Ci+1
+            return state + 1;
+          else if (state < E) // Di -> Bi+1
+            return B + (state - D) + 1;
+          else // Ei -> Ci+1
+            return C + (state - E) + 1;
+        default: /* <1,1,1> */
+          return Math.min(size() - 1, state + 1);
+      }
+    }
+  }
+}

Property changes on: src\java\org\apache\lucene\util\automaton\LevenshteinAutomata.java
___________________________________________________________________
Added: svn:eol-style
   + native

