Index: core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
===================================================================
--- core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java	(revision 1508841)
+++ core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java	(working copy)
@@ -66,7 +66,8 @@
   final TempPostingsReaderBase postingsReader;
   IndexInput indexIn = null;
   IndexInput blockIn = null;
-  //static final boolean TEST = false;
+  //static final boolean TEST = true;
+  static final boolean TEST = false;
 
   public TempFSTOrdTermsReader(SegmentReadState state, TempPostingsReaderBase postingsReader) throws IOException {
     final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
@@ -166,6 +167,8 @@
 
     final int numSkipInfo;
     final long[] skipInfo;
+    final byte[] suffixInfo;
+
     final byte[] statsBlock;
     final byte[] metaLongsBlock;
     final byte[] metaBytesBlock;
@@ -183,9 +186,11 @@
       final int numBlocks = (int)(numTerms + INTERVAL - 1) / INTERVAL;
       this.numSkipInfo = longsSize + 3;
       this.skipInfo = new long[numBlocks * numSkipInfo];
+      this.suffixInfo = new byte[(int)blockIn.readVLong() + 1];
       this.statsBlock = new byte[(int)blockIn.readVLong()];
       this.metaLongsBlock = new byte[(int)blockIn.readVLong()];
       this.metaBytesBlock = new byte[(int)blockIn.readVLong()];
+      assert suffixInfo.length == numTerms + 1;
 
       int last = 0, next = 0;
       for (int i = 1; i < numBlocks; i++) {
@@ -195,6 +200,7 @@
         }
         last = next;
       }
+      blockIn.readBytes(suffixInfo, 0, suffixInfo.length - 1);
       blockIn.readBytes(statsBlock, 0, statsBlock.length);
       blockIn.readBytes(metaLongsBlock, 0, metaLongsBlock.length);
       blockIn.readBytes(metaBytesBlock, 0, metaBytesBlock.length);
@@ -502,6 +508,17 @@
       /* True when there is pending term when calling next() */
       boolean pending;
 
+      /* the min ord we need to met, to accept the last suffix */
+      // nocommit: we can prune only when we find nextOrd <= minOrd, 
+      // when this happens we'll not expand current arc, instead, we'll
+      // load next frame
+      //
+      //
+      long minOrd;
+
+      /* the max we have met */
+      long maxOrd;
+
       /* stack to record how current term is constructed, 
        * used to accumulate metadata or rewind term:
        *   level == term.length + 1,
@@ -516,30 +533,46 @@
 
       /* query automaton to intersect with */
       final ByteRunAutomaton fsa;
+      BytesRef fsaSuffix;
+      final byte fsaEnd;
 
       private final class Frame {
         /* fst stats */
         FST.Arc<Long> arc;
+        FST.Arc<Long> nextArc;
 
         /* automaton stats */
         int state;
+        int nextState;
 
         Frame() {
           this.arc = new FST.Arc<Long>();
+          this.nextArc = new FST.Arc<Long>();
           this.state = -1;
+          this.nextState = -1;
         }
 
         public String toString() {
-          return "arc=" + arc + " state=" + state;
+          return "[arc=" + arc + " state=" + state + "] [nextArc=" + nextArc + " nextState=" + nextState + "]";
         }
       }
 
       IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
-        //if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
         this.fst = index;
         this.fstReader = fst.getBytesReader();
-        this.fstOutputs = index.outputs;
+        this.fstOutputs = fst.outputs;
         this.fsa = compiled.runAutomaton;
+        this.fsaSuffix = compiled.commonSuffixRef;
+        if (fsaSuffix != null && fsaSuffix.length > 0) {
+          this.fsaEnd = fsaSuffix.bytes[fsaSuffix.length-1];
+          suffixInfo[suffixInfo.length-1] = fsaEnd;
+        } else {
+          this.fsaEnd = (byte)0xff;
+          this.fsaSuffix = null;
+        }
+        this.minOrd = 0;
+        this.maxOrd = 0;
+        if (TEST) System.out.println("Enum init, startTerm=" + startTerm + " end=" + (char)fsaEnd);
         /*
         PrintWriter pw1 = new PrintWriter(new File("../temp/fst.txt"));
         Util.toDot(dict,pw1, false, false);
@@ -563,11 +596,16 @@
         this.decoded = false;
         this.pending = false;
 
+        if (isAccept(topFrame())) {
+          minOrd++;
+        }
         if (startTerm == null) {
           pending = isAccept(topFrame());
+          seekSuffix();
         } else {
           doSeekCeil(startTerm);
           pending = !startTerm.equals(term) && isValid(topFrame()) && isAccept(topFrame());
+          seekSuffix();
         }
       }
 
@@ -581,9 +619,9 @@
 
       @Override
       void decodeStats() throws IOException {
-        final FST.Arc<Long> arc = topFrame().arc;
-        assert arc.nextFinalOutput == fstOutputs.getNoOutput();
-        ord = arc.output;
+        assert topFrame().arc.nextFinalOutput == fstOutputs.getNoOutput();
+        ord = topFrame().arc.output;
+        if (TEST) System.out.println(" return term=" + term + " ord=" + ord);
         super.decodeStats();
       }
 
@@ -602,7 +640,7 @@
 
       @Override
       public BytesRef next() throws IOException {
-        //if (TEST) System.out.println("Enum next()");
+        if (TEST) System.out.println("Enum next()");
         if (pending) {
           pending = false;
           decodeStats();
@@ -637,7 +675,7 @@
       }
 
       BytesRef doSeekCeil(BytesRef target) throws IOException {
-        //if (TEST) System.out.println("Enum doSeekCeil()");
+        if (TEST) System.out.println("Enum doSeekCeil()");
         Frame frame= null;
         int label, upto = 0, limit = target.length;
         while (upto < limit) {  // to target prefix, or ceil label (rewind prefix)
@@ -681,62 +719,121 @@
 
       /** Load frame for start arc(node) on fst */
       Frame loadFirstFrame(Frame frame) throws IOException {
-        frame.arc = fst.getFirstArc(frame.arc);
-        frame.state = fsa.getInitialState();
+        frame.nextArc = fst.getFirstArc(frame.arc);
+        frame.nextState = fsa.getInitialState();
+        frame.arc.copyFrom(frame.nextArc);
+        frame.state = frame.nextState;
         return frame;
       }
 
-      // nocommit: expected to use readFirstTargetArc here?
-
       /** Load frame for target arc(node) on fst */
       Frame loadExpandFrame(Frame top, Frame frame) throws IOException {
+        if (TEST) System.out.println(" loadExpand ");
         if (!canGrow(top)) {
+          if (TEST) System.out.println("can't grow");
           return null;
         }
         frame.arc = fst.readFirstRealTargetArc(top.arc.target, frame.arc, fstReader);
         frame.state = fsa.step(top.state, frame.arc.label);
-        //if (TEST) System.out.println(" loadExpand frame="+frame);
+
+        rotateFrame(top, frame);  // only rotate;
+
         if (frame.state == -1) {
           return loadNextFrame(top, frame);
         }
         return frame;
       }
 
-      /** Load frame for sibling arc(node) on fst */
-      Frame loadNextFrame(Frame top, Frame frame) throws IOException {
-        if (!canRewind(frame)) {
-          return null;
-        }
-        while (!frame.arc.isLast()) {
-          frame.arc = fst.readNextRealArc(frame.arc, fstReader);
-          frame.state = fsa.step(top.state, frame.arc.label);
-          if (frame.state != -1) {
-            break;
-          }
-        }
-        //if (TEST) System.out.println(" loadNext frame="+frame);
-        if (frame.state == -1) {
-          return null;
-        }
-        return frame;
-      }
-
       /** Load frame for target arc(node) on fst, so that 
        *  arc.label >= label and !fsa.reject(arc.label) */
       Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
+        if (TEST) System.out.println(" loadCeil");
         FST.Arc<Long> arc = frame.arc;
         arc = Util.readCeilArc(label, fst, top.arc, arc, fstReader);
         if (arc == null) {
+          if (TEST) System.out.println("can't find");
           return null;
         }
         frame.state = fsa.step(top.state, arc.label);
-        //if (TEST) System.out.println(" loadCeil frame="+frame);
+
+        rotateFrame(top, frame);  // rotate frame
+
         if (frame.state == -1) {
           return loadNextFrame(top, frame);
         }
         return frame;
       }
 
+      /* Foresee next arc and next state */
+      void rotateFrame(Frame top, Frame frame) throws IOException {
+        frame.nextArc.copyFrom(frame.arc);
+        frame.nextState = frame.state;
+        boolean found = false;
+        while (!frame.nextArc.isLast()) {
+          frame.nextArc = fst.readNextRealArc(frame.nextArc, fstReader);
+          frame.nextState = fsa.step(top.state, frame.nextArc.label);
+          if (frame.nextState != -1) {
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          return;
+        }
+        ord = frame.arc.output + top.arc.output;
+        maxOrd = ord > maxOrd ? ord : maxOrd;
+        if (maxOrd > minOrd) {
+          minOrd = maxOrd;
+          seekSuffix();
+        }
+        ord = frame.nextArc.output + top.arc.output;
+        if (ord > maxOrd && ord <= minOrd) {
+          if (TEST) System.out.println(" prune: next="+ord +" min="+minOrd);
+          frame.state = -1;
+        }
+      }
+      
+
+      /** Load frame for sibling arc(node) on fst */
+      Frame loadNextFrame(Frame top, Frame frame) throws IOException {
+        if (TEST) System.out.print(" loadNext ");
+        if (!canRewind(frame)) {
+          if (TEST) System.out.println("can't rewind");
+          return null;
+        }
+        if (frame.nextState == -1) {
+          return null;
+        }
+        frame.arc.copyFrom(frame.nextArc);
+        frame.state = frame.nextState;
+
+        boolean found = false;
+        while (!frame.nextArc.isLast()) {
+          frame.nextArc = fst.readNextRealArc(frame.nextArc, fstReader);
+          frame.nextState = fsa.step(top.state, frame.nextArc.label);
+          if (frame.nextState != -1) {
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          return frame;
+        }
+        ord = frame.arc.output + top.arc.output;
+        maxOrd = ord > maxOrd ? ord : maxOrd;
+        if (maxOrd > minOrd) {
+          minOrd = maxOrd;
+          seekSuffix();
+        }
+        ord = frame.nextArc.output + top.arc.output;
+        if (ord > maxOrd && ord <= minOrd) {
+          if (TEST) System.out.println(" prune: next="+ord +" min="+minOrd);
+          return loadNextFrame(top, frame);
+        }
+        if (TEST) System.out.println("frame="+frame);
+        return frame;
+      }
+
       boolean isAccept(Frame frame) {  // reach a term both fst&fsa accepts
         return fsa.isAccept(frame.state) && frame.arc.isFinal();
       }
@@ -750,17 +847,30 @@
         return !frame.arc.isLast();
       }
 
+      void seekSuffix() {
+        if (fsaSuffix != null) {
+          int upto = (int)minOrd;
+          while (suffixInfo[upto] != fsaEnd) {
+            upto++;
+          }
+          minOrd = upto;
+          if (TEST) System.out.println(" seekSuffix() upto="+upto);
+        }
+      }
+
       // nocommit: need to load ord lazily?
       void pushFrame(Frame frame) {
         final FST.Arc<Long> arc = frame.arc;
         arc.output = fstOutputs.add(topFrame().arc.output, arc.output);
         term = grow(arc.label);
         level++;
+        if (TEST) System.out.println(" term=" + term);
         assert frame == stack[level];
       }
 
       Frame popFrame() {
         term = shrink();
+        if (TEST) System.out.println(" term=" + term);
         return stack[level--];
       }
 
Index: core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java
===================================================================
--- core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java	(revision 1508841)
+++ core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java	(working copy)
@@ -107,14 +107,18 @@
         blockOut.writeVLong(field.sumDocFreq);
         blockOut.writeVInt(field.docCount);
         blockOut.writeVInt(field.longsSize);
+
+        blockOut.writeVLong(field.suffixOut.getFilePointer());
         blockOut.writeVLong(field.statsOut.getFilePointer());
         blockOut.writeVLong(field.metaLongsOut.getFilePointer());
         blockOut.writeVLong(field.metaBytesOut.getFilePointer());
 
         field.skipOut.writeTo(blockOut);
+        field.suffixOut.writeTo(blockOut);
         field.statsOut.writeTo(blockOut);
         field.metaLongsOut.writeTo(blockOut);
         field.metaBytesOut.writeTo(blockOut);
+
         field.dict.save(indexOut);
       }
       writeTrailer(indexOut, indexDirStart);
@@ -146,8 +150,14 @@
 
     // nocommit: block encode each part 
     // (so that we'll have metaLongsOut[])
+    //
+    // nocommit: suffixBlock surely eats disk space, 
+    // so maybe we can set a [minByte,maxByte] 
+    // for each block, consuming only 2 bytes 
+    // instead of INTERVAL bytes.
     public RAMOutputStream skipOut;       // vint encode next skip point (all values start from 0, fully decoded when reading)
     public RAMOutputStream statsOut;      // vint encode df, (ttf-df)
+    public RAMOutputStream suffixOut;     // one byte for each term, for wildcard optimization
     public RAMOutputStream metaLongsOut;  // vint encode monotonic long[] and length for corresponding byte[]
     public RAMOutputStream metaBytesOut;  // put all bytes blob here
   }
@@ -161,6 +171,7 @@
 
     private final IntsRef scratchTerm = new IntsRef();
     private final RAMOutputStream statsOut = new RAMOutputStream();
+    private final RAMOutputStream suffixOut = new RAMOutputStream();
     private final RAMOutputStream metaLongsOut = new RAMOutputStream();
     private final RAMOutputStream metaBytesOut = new RAMOutputStream();
 
@@ -208,6 +219,7 @@
       // write term meta data into fst
       final long longs[] = new long[longsSize];
       final long delta = stats.totalTermFreq - stats.docFreq;
+      final int length = text.length;
       if (stats.totalTermFreq > 0) {
         if (delta == 0) {
           statsOut.writeVInt(stats.docFreq<<1|1);
@@ -218,6 +230,12 @@
       } else {
         statsOut.writeVInt(stats.docFreq);
       }
+      if (length > 0) {
+        suffixOut.writeByte(text.bytes[text.offset + length - 1]);
+      } else {
+        suffixOut.writeByte((byte)0);
+        assert numTerms == 0;
+      }
       postingsWriter.finishTerm(longs, metaBytesOut, stats);
       for (int i = 0; i < longsSize; i++) {
         metaLongsOut.writeVLong(longs[i] - lastLongs[i]);
@@ -243,6 +261,7 @@
         metadata.longsSize = longsSize;
         metadata.skipOut = skipOut;
         metadata.statsOut = statsOut;
+        metadata.suffixOut = suffixOut;
         metadata.metaLongsOut = metaLongsOut;
         metadata.metaBytesOut = metaBytesOut;
         metadata.dict = builder.finish();
