Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt	(revision 1233916)
+++ lucene/CHANGES.txt	(working copy)
@@ -789,6 +789,9 @@
 * LUCENE-3121: Add sugar reverse lookup (given an output, find the
   input mapping to it) for FSTs that have strictly monotonic long
   outputs (such as an ord).  (Mike McCandless)
+
+* LUCENE-3714: Add top N lowest cost paths search for FST<Long>
+  (Robert Muir, Dawid Weiss, Mike McCandless)
   
 Bug fixes
 
Index: lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
===================================================================
--- lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java	(revision 1233916)
+++ lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java	(working copy)
@@ -1055,50 +1055,6 @@
     }
   }
 
-  // NOTE: this test shows a case where our current builder
-  // fails to produce minimal FST:
-  /*
-  public void test3() throws Exception {
-    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
-    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
-    IntsRef scratchIntsRef = new IntsRef();
-    builder.add(Util.toIntsRef(new BytesRef("aa$"), scratchIntsRef), outputs.get(0));
-    builder.add(Util.toIntsRef(new BytesRef("aab$"), scratchIntsRef), 1L);
-    builder.add(Util.toIntsRef(new BytesRef("bbb$"), scratchIntsRef), 2L);
-    final FST<Long> fst = builder.finish();
-    //System.out.println("NODES " + fst.getNodeCount() + " ARCS " + fst.getArcCount());
-    // NOTE: we produce 7 nodes today
-    assertEquals(6, fst.getNodeCount());
-    // NOTE: we produce 8 arcs today
-    assertEquals(7, fst.getNodeCount());
-    //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
-    //Util.toDot(fst, w, false, false);
-    //w.close();
-  }
-  */
-
-  // NOTE: this test shows a case where our current builder
-  // fails to produce minimal FST:
-  /*
-  public void test4() throws Exception {
-    final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
-    Builder<BytesRef> builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
-    IntsRef scratchIntsRef = new IntsRef();
-    builder.add(Util.toIntsRef(new BytesRef("aa$"), scratchIntsRef), outputs.getNoOutput());
-    builder.add(Util.toIntsRef(new BytesRef("aab$"), scratchIntsRef), new BytesRef("1"));
-    builder.add(Util.toIntsRef(new BytesRef("bbb$"), scratchIntsRef), new BytesRef("11"));
-    final FST<BytesRef> fst = builder.finish();
-    //System.out.println("NODES " + fst.getNodeCount() + " ARCS " + fst.getArcCount());
-    // NOTE: we produce 7 nodes today
-    assertEquals(6, fst.getNodeCount());
-    // NOTE: we produce 8 arcs today
-    assertEquals(7, fst.getNodeCount());
-    //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
-    //Util.toDot(fst, w, false, false);
-    //w.close();
-  }
-  */
-
   // Build FST for all unique terms in the test line docs
   // file, up until a time limit
   public void testRealTerms() throws Exception {
@@ -1890,4 +1846,115 @@
     assertFalse(arc.isFinal());
     assertEquals(42, arc.output.longValue());
   }
+
+  public void testMinPairs() throws Exception {
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
+
+    final IntsRef scratch = new IntsRef();
+    builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), outputs.get(22));
+    builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), outputs.get(7));
+    builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), outputs.get(17));
+    final FST<Long> fst = builder.finish();
+    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
+    Util.toDot(fst, w, false, false);
+    w.close();
+
+    Util.MinResult[] r = Util.findMinPairs(fst,
+                                           fst.getFirstArc(new FST.Arc<Long>()),
+                                           3);
+    assertEquals(3, r.length);
+
+    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), r[0].input);
+    assertEquals(7, r[0].output);
+
+    assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), r[1].input);
+    assertEquals(17, r[1].output);
+
+    assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), r[2].input);
+    assertEquals(22, r[2].output);
+  }
+  
+  public void testMinPairsRandom() throws Exception {
+    int numWords = atLeast(1000);
+    
+    final TreeMap<String,Long> slowCompletor = new TreeMap<String,Long>();
+    final TreeSet<String> allPrefixes = new TreeSet<String>();
+    
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
+    final IntsRef scratch = new IntsRef();
+    
+    for (int i = 0; i < numWords; i++) {
+      String s;
+      while (true) {
+        s = _TestUtil.randomSimpleString(random);
+        if (!slowCompletor.containsKey(s)) {
+          break;
+        }
+      }
+      
+      for (int j = 1; j < s.length(); j++) {
+        allPrefixes.add(s.substring(0, j));
+      }
+      int weight = _TestUtil.nextInt(random, 1, 100); // weights 1..100
+      slowCompletor.put(s, (long)weight);
+    }
+    
+    for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
+      //System.out.println("add: " + e);
+      builder.add(Util.toIntsRef(new BytesRef(e.getKey()), scratch), outputs.get(e.getValue()));
+    }
+    
+    final FST<Long> fst = builder.finish();
+    //System.out.println("SAVE out.dot");
+    //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
+    //Util.toDot(fst, w, false, false);
+    //w.close();
+    
+    //System.out.println("testing: " + allPrefixes.size() + " prefixes");
+    for (String prefix : allPrefixes) {
+      // 1. run prefix against fst, then complete by value
+      //System.out.println("TEST: " + prefix);
+
+      long prefixOutput = 0;
+      FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
+      for(int idx=0;idx<prefix.length();idx++) {
+        if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc) == null) {
+          fail();
+        }
+        prefixOutput += arc.output;
+      }
+
+      final int topN = _TestUtil.nextInt(random, 1, 10);
+
+      Util.MinResult[] r = Util.findMinPairs(fst, arc, topN);
+
+      // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
+      final List<Util.MinResult> matches = new ArrayList<Util.MinResult>();
+
+      // TODO: could be faster... but its slowCompletor for a reason
+      for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
+        if (e.getKey().startsWith(prefix)) {
+          //System.out.println("  consider " + e.getKey());
+          matches.add(new Util.MinResult(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())), new IntsRef()),
+                                         e.getValue() - prefixOutput));
+        }
+      }
+
+      assertTrue(matches.size() > 0);
+      Collections.sort(matches);
+      if (matches.size() > topN) {
+        matches.subList(topN, matches.size()).clear();
+      }
+
+      assertEquals(matches.size(), r.length);
+
+      for(int hit=0;hit<r.length;hit++) {
+        //System.out.println("  check hit " + hit);
+        assertEquals(matches.get(hit).input, r[hit].input);
+        assertEquals(matches.get(hit).output, r[hit].output);
+      }
+    }
+  }
 }
Index: lucene/src/java/org/apache/lucene/util/fst/Util.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/fst/Util.java	(revision 1233916)
+++ lucene/src/java/org/apache/lucene/util/fst/Util.java	(working copy)
@@ -193,7 +193,304 @@
       }
     }    
   }
-  
+
+  private static class FSTPath implements Comparable<FSTPath> {
+    public FST.Arc<Long> arc;
+    public long cost;
+    public final IntsRef input = new IntsRef();
+
+    public FSTPath(long cost, FST.Arc<Long> arc) {
+      this.arc = new FST.Arc<Long>().copyFrom(arc);
+      this.cost = cost;
+    }
+
+    @Override
+    public String toString() {
+      return "input=" + input + " cost=" + cost;
+    }
+
+    @Override
+    public int compareTo(FSTPath other) {
+      if (cost < other.cost) {
+        return -1;
+      } else if (cost > other.cost) {
+        return 1;
+      } else  {
+        return input.compareTo(other.input);
+      }
+    }
+  }
+
+  private static class TopNSearcher {
+
+    private final FST<Long> fst;
+    private final FST.Arc<Long> fromNode;
+    private final int topN;
+
+    // Set once the queue has filled:
+    FSTPath bottom = null;
+
+    TreeSet<FSTPath> queue = null;
+
+    public TopNSearcher(FST<Long> fst, FST.Arc<Long> fromNode, int topN) {
+      this.fst = fst;
+      this.topN = topN;
+      this.fromNode = fromNode;
+    }
+
+    // If back plus this arc is competitive then add to queue:
+    private void addIfCompetitive(FSTPath path) {
+
+      assert queue != null;
+
+      long cost = path.cost + path.arc.output;
+      //System.out.println("  addIfCompetitive bottom=" + bottom + " queue.size()=" + queue.size());
+
+      if (bottom != null) {
+
+        if (cost > bottom.cost) {
+          // Doesn't compete
+          return;
+        } else if (cost == bottom.cost) {
+          // Tie break by alpha sort on the input:
+          path.input.grow(path.input.length+1);
+          path.input.ints[path.input.length++] = path.arc.label;
+          final int cmp = bottom.input.compareTo(path.input);
+          path.input.length--;
+          assert cmp != 0;
+          if (cmp < 0) {
+            // Doesn't compete
+            return;
+          }
+        }
+        // Competes
+      } else {
+        // Queue isn't full yet, so any path we hit competes:
+      }
+
+      final FSTPath newPath = new FSTPath(cost, path.arc);
+
+      newPath.input.grow(path.input.length+1);
+      System.arraycopy(path.input.ints, 0, newPath.input.ints, 0, path.input.length);
+      newPath.input.ints[path.input.length] = path.arc.label;
+      newPath.input.length = path.input.length+1;
+
+      //System.out.println("    add path=" + newPath);
+      queue.add(newPath);
+      if (bottom != null) {
+        final FSTPath removed = queue.pollLast();
+        assert removed == bottom;
+        bottom = queue.last();
+        //System.out.println("    now re-set bottom: " + bottom + " queue=" + queue);
+      } else if (queue.size() == topN) {
+        // Queue just filled up:
+        bottom = queue.last();
+        //System.out.println("    now set bottom: " + bottom);
+      }
+    }
+
+    public MinResult[] search() throws IOException {
+      //System.out.println("  search topN=" + topN);
+      final FST.Arc<Long> scratchArc = new FST.Arc<Long>();
+
+      final List<MinResult> results = new ArrayList<MinResult>();
+
+      final Long NO_OUTPUT = fst.outputs.getNoOutput();
+
+      // TODO: we could enable FST to sorting arcs by weight
+      // as it freezes... can easily do this on first pass
+      // (w/o requiring rewrite)
+
+      // TODO: maybe we should make an FST.INPUT_TYPE.BYTE0.5!?
+      // (nibbles)
+
+      // For each top N path:
+      while (results.size() < topN) {
+        //System.out.println("\nfind next path");
+
+        FSTPath path;
+
+        if (queue == null) {
+
+          if (results.size() != 0) {
+            // Ran out of paths
+            break;
+          }
+
+          // First pass (top path): start from original fromNode
+          if (topN > 1) {
+            queue = new TreeSet<FSTPath>();
+          }
+
+          long minArcCost = Long.MAX_VALUE;
+          FST.Arc<Long> minArc = null;
+
+          path = new FSTPath(0, fromNode);
+          fst.readFirstTargetArc(fromNode, path.arc);
+
+          // Bootstrap: find the min starting arc
+          while (true) {
+            long arcScore = path.arc.output;
+            //System.out.println("  arc=" + (char) path.arc.label + " cost=" + arcScore);
+            if (arcScore < minArcCost) {
+              minArcCost = arcScore;
+              minArc = scratchArc.copyFrom(path.arc);
+              //System.out.println("    **");
+            }
+            if (queue != null) {
+              addIfCompetitive(path);
+            }
+            if (path.arc.isLast()) {
+              break;
+            }
+            fst.readNextArc(path.arc);
+          }
+
+          assert minArc != null;
+
+          if (queue != null) {
+            // Remove top path since we are now going to
+            // pursue it:
+            path = queue.pollFirst();
+            //System.out.println("  remove init path=" + path);
+            assert path.arc.label == minArc.label;
+            if (bottom != null && queue.size() == topN-1) {
+              bottom = queue.last();
+              //System.out.println("    set init bottom: " + bottom);
+            }
+          } else {
+            path.arc.copyFrom(minArc);
+            path.input.grow(1);
+            path.input.ints[0] = minArc.label;
+            path.input.length = 1;
+            path.cost = minArc.output;
+          }
+
+        } else {
+          path = queue.pollFirst();
+          if (path == null) {
+            // There were less than topN paths available:
+            break;
+          }
+        }
+
+        if (path.arc.label == FST.END_LABEL) {
+          //System.out.println("    empty string!  cost=" + path.cost);
+          // Empty string!
+          path.input.length--;
+          results.add(new MinResult(path.input, path.cost));
+          continue;
+        }
+
+        if (results.size() == topN-1) {
+          // Last path -- don't bother w/ queue anymore:
+          queue = null;
+        }
+
+        //System.out.println("  path: " + path);
+        
+        // We take path and find its "0 output completion",
+        // ie, just keep traversing the first arc with
+        // NO_OUTPUT that we can find, since this must lead
+        // to the minimum path that completes from
+        // path.arc.
+
+        // For each input letter:
+        while (true) {
+
+          //System.out.println("\n    cycle path: " + path);
+
+          fst.readFirstTargetArc(path.arc, path.arc);
+
+          // For each arc leaving this node:
+          boolean foundZero = false;
+          while(true) {
+            //System.out.println("      arc=" + (char) path.arc.label + " cost=" + path.arc.output);
+            if (path.arc.output == NO_OUTPUT) {
+              if (queue == null) {
+                foundZero = true;
+                break;
+              } else if (!foundZero) {
+                scratchArc.copyFrom(path.arc);
+                foundZero = true;
+              } else {
+                addIfCompetitive(path);
+              }
+            } else if (queue != null) {
+              addIfCompetitive(path);
+            }
+            if (path.arc.isLast()) {
+              break;
+            }
+            fst.readNextArc(path.arc);
+          }
+
+          assert foundZero;
+
+          if (queue != null) {
+            // TODO: maybe we can save this copyFrom if we
+            // are more clever above... eg on finding the
+            // first NO_OUTPUT arc we'd switch to using
+            // scratchArc
+            path.arc.copyFrom(scratchArc);
+          }
+
+          if (path.arc.label == FST.END_LABEL) {
+            // Add final output:
+            //System.out.println("    done!: " + path);
+            results.add(new MinResult(path.input, path.cost + path.arc.output));
+            break;
+          } else {
+            path.input.grow(1+path.input.length);
+            path.input.ints[path.input.length] = path.arc.label;
+            path.input.length++;
+            path.cost += path.arc.output;
+          }
+        }
+      }
+    
+      return results.toArray(new MinResult[results.size()]);
+    }
+  }
+
+  // TODO: parameterize the FST type <T> and allow passing in a
+  // comparator; eg maybe your output is a PairOutput and
+  // one of the outputs in the pair is monotonic so you
+  // compare by that
+
+  public final static class MinResult implements Comparable<MinResult> {
+    final IntsRef input;
+    final long output;
+    public MinResult(IntsRef input, long output) {
+      this.input = input;
+      this.output = output;
+    }
+
+    @Override
+    public int compareTo(MinResult other) {
+      if (output < other.output) {
+        return -1;
+      } else if (output > other.output) {
+        return 1;
+      } else {
+        return input.compareTo(other.input);
+      }
+    }
+  }
+
+  /** Starting from node, find the top N min cost (Long
+   *  output) completions to a final node.  This finds the
+   *  top N results; if you only need the best result, use
+   *  {@link #findMinPair}.
+   *
+   *  <p>NOTE: you must share the outputs when you build the
+   *  FST (pass doShare=true to {@link
+   *  PositiveIntOutputs#getSingleton}). */
+
+  public static MinResult[] findMinPairs(FST<Long> fst, FST.Arc<Long> fromNode, int topN) throws IOException {
+    return new TopNSearcher(fst, fromNode, topN).search();
+  } 
+
   /**
    * Dumps an {@link FST} to a GraphViz's <code>dot</code> language description
    * for visualization. Example of use:
Index: lucene/src/java/org/apache/lucene/util/fst/Builder.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/fst/Builder.java	(revision 1233916)
+++ lucene/src/java/org/apache/lucene/util/fst/Builder.java	(working copy)
@@ -25,7 +25,7 @@
 import java.io.IOException;
 
 /**
- * Builds a compact FST (maps an IntsRef term to an arbitrary
+ * Builds a minimal FST (maps an IntsRef term to an arbitrary
  * output) from pre-sorted terms with outputs (the FST
  * becomes an FSA if you use NoOutputs).  The FST is written
  * on-the-fly into a compact serialized format byte array, which can
@@ -35,12 +35,6 @@
  * <p>NOTE: The algorithm is described at
  * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
  *
- * If your outputs are ByteSequenceOutput then the final FST
- * will be minimal, but if you use PositiveIntOutput then
- * it's only "near minimal".  For example, aa/0, aab/1, bbb/2
- * will produce 6 states when a 5 state fst is also
- * possible.
- *
  * The parameterized type T is the output type.  See the
  * subclasses of {@link Outputs}.
  *
