Index: lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java	(revision 1361471)
+++ lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java	(working copy)
@@ -46,41 +46,35 @@
   }
 
   /**
-   * Should not encode extra information other than header
+   * Should not encode extra information other than single int
    */
-  public void testPForAllZeros() throws Exception {
+  public void testAllEqual() throws Exception {
+    initRandom();
     int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
-    int ensz;
     int[] data=new int[sz];
-    byte[] res = new byte[4+sz*8];
+    byte[] res = new byte[sz*8];
     int[] copy = new int[sz];
     IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
+    int ensz;
+    int header;
 
-    Arrays.fill(data,0);
-    ensz = ForUtil.compress(data,sz,resBuffer); // test For
-    ForUtil.decompress(resBuffer,copy);
+    Arrays.fill(data,gen.nextInt());
+    header = ForUtil.compress(data,resBuffer); // test For
+    ensz = ForUtil.getEncodedSize(header);
     assert ensz == 4;
+
+    ForUtil.decompress(resBuffer,copy,header);
     assert cmp(data,sz,copy,sz)==true;
 
-    Arrays.fill(data,0);
-    ensz = PForUtil.compress(data,sz,resBuffer); // test PFor
-    PForUtil.decompress(resBuffer,copy);
+    Arrays.fill(data,gen.nextInt());
+    header = PForUtil.compress(data,resBuffer); // test PFor
+    ensz = PForUtil.getEncodedSize(header);
     assert ensz == 4;
+
+    PForUtil.decompress(resBuffer,copy,header);
     assert cmp(data,sz,copy,sz)==true;
   }
 
-  public void testForAllZeros() throws Exception {
-    int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
-    int ensz;
-    int[] data=new int[sz];
-    byte[] res = new byte[4+sz*8];
-    int[] copy = new int[sz];
-    IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
-
-    ensz = ForUtil.compress(data,sz,resBuffer);
-
-  }
-
   /**
    * Test correctness of forced exception.
    * the forced ones should exactly fit max chain 
@@ -89,7 +83,7 @@
     initRandom();
     int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
     int[] data=new int[sz];
-    byte[] res = new byte[4+sz*8];
+    byte[] res = new byte[sz*8];
     int[] copy = new int[sz];
     IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
     int numBits = gen.nextInt(5)+1;
@@ -107,11 +101,11 @@
       data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
       j++;
     }
-    ensz = PForUtil.compress(data,sz,resBuffer);
-    header = resBuffer.get(0);
+    header = PForUtil.compress(data,resBuffer);
+    ensz = PForUtil.getEncodedSize(header);
     expect = j; 
     got = PForUtil.getExcNum(header);
-    assert expect == got: expect+" expected but got "+got;
+    assert expect >= got: expect+" expected but got "+got;
 
     // there should exactly one forced exception before each
     // exception when i>0
@@ -122,11 +116,11 @@
       data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
       j++;
     }
-    ensz = PForUtil.compress(data,sz,resBuffer);
-    header = resBuffer.get(0);
+    header = PForUtil.compress(data,resBuffer);
+    ensz = PForUtil.getEncodedSize(header);
     expect = 2*(j-1)+1; 
     got = PForUtil.getExcNum(header);
-    assert expect == got: expect+" expected but got "+got;
+    assert expect >= got: expect+" expected but got "+got;
 
 
     // two forced exception  
@@ -137,11 +131,11 @@
       data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
       j++;
     }
-    ensz = PForUtil.compress(data,sz,resBuffer);
-    header = resBuffer.get(0);
+    header = PForUtil.compress(data,resBuffer);
+    ensz = PForUtil.getEncodedSize(header);
     expect = 3*(j-1)+1; 
     got = PForUtil.getExcNum(header);
-    assert expect == got: expect+" expected but got "+got;
+    assert expect >= got: expect+" expected but got "+got;
 
   }
   /**
@@ -156,7 +150,7 @@
     Integer[] buff= new Integer[sz];
     int[] data = new int[sz];
     int[] copy = new int[sz];
-    byte[] res = new byte[4+sz*8];
+    byte[] res = new byte[sz*8];
     IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
 
     int excIndex = gen.nextInt(sz/2);
@@ -176,13 +170,13 @@
     for (int i=0; i<sz; ++i)
       data[i] = buff[i];
 
-    int ensz = PForUtil.compress(data,sz,resBuffer);
+    int header = PForUtil.compress(data,resBuffer);
+    int ensz = PForUtil.getEncodedSize(header);
 
-    assert (ensz <= sz*8+4): ensz+" > "+sz*8+4;  // must not exceed the loose upperbound
-    assert (ensz >= 8);       // at least we have a header along with an exception, right?
+    assert (ensz <= sz*8): ensz+" > "+sz*8;  // must not exceed the loose upperbound
+    assert (ensz >= 4);       // at least we have an exception, right?
 
-    resBuffer.rewind();
-    PForUtil.decompress(resBuffer,copy);
+    PForUtil.decompress(resBuffer,copy,header);
 
 //    println(getHex(data,sz)+"\n");
 //    println(getHex(res,ensz)+"\n");
@@ -211,7 +205,7 @@
     int[] data = new int[sz];
     for (int i=0; i<=32; ++i) { // try to test every kinds of distribution
       double alpha=gen.nextDouble(); // rate of normal value
-      for (int j=0; j<=32; ++j) {
+      for (int j=i; j<=32; ++j) {
         createDistribution(data,sz,alpha,MASK[i],MASK[j]);
         tryCompressAndDecompress(data, sz);
       }
@@ -229,15 +223,16 @@
       data[i] = buff[i];
   }
   public void tryCompressAndDecompress(final int[] data, int sz) throws Exception {
-    byte[] res = new byte[4+sz*8];      // loosely upperbound
+    byte[] res = new byte[sz*8];      // loosely upperbound
     IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
 
-    int ensz = PForUtil.compress(data,sz,resBuffer);
+    int header = PForUtil.compress(data,resBuffer);
+    int ensz = PForUtil.getEncodedSize(header);
     
-    assert (ensz <= sz*8+4);  // must not exceed the loose upperbound
+    assert (ensz <= sz*8);  // must not exceed the loose upperbound
 
     int[] copy = new int[sz];
-    PForUtil.decompress(resBuffer,copy);
+    PForUtil.decompress(resBuffer,copy,header);
 
 //    println(getHex(data,sz)+"\n");
 //    println(getHex(res,ensz)+"\n");
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java	(revision 1361471)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java	(working copy)
@@ -39,11 +39,7 @@
 
 public final class PForFactory extends IntStreamFactory {
 
-  /* number of ints for each block */
-  private final int blockSize;
-
   public PForFactory() {
-    this.blockSize=PForPostingsFormat.DEFAULT_BLOCK_SIZE;
   }
 
   @Override
@@ -51,7 +47,7 @@
     boolean success = false;
     IndexOutput out = dir.createOutput(fileName, context);
     try {
-      IntIndexOutput ret = new PForIndexOutput(out, blockSize);
+      IntIndexOutput ret = new PForIndexOutput(out);
       success = true;
       return ret;
     } finally {
@@ -85,11 +81,10 @@
       private final IntBuffer encodedBuffer;
 
       PForBlockReader(final IndexInput in, final int[] buffer) {
-        // upperbound for encoded value should include:
+        // upperbound for encoded value should include(here header is not buffered):
         // 1. blockSize of normal value (4x bytes); 
         // 2. blockSize of exception value (4x bytes);
-        // 3. header (4bytes);
-        this.encoded = new byte[blockSize*8+4];
+        this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8];
         this.in = in;
         this.buffer = buffer;
         this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
@@ -98,10 +93,11 @@
       // TODO: implement public void skipBlock() {} ?
       @Override
       public void readBlock() throws IOException {
-        final int numBytes = in.readInt();
-        assert numBytes <= blockSize*8+4;
+        final int header = in.readInt();
+        final int numBytes = PForUtil.getEncodedSize(header);
+        assert numBytes <= PForPostingsFormat.DEFAULT_BLOCK_SIZE*8;
         in.readBytes(encoded,0,numBytes);
-        PForUtil.decompress(encodedBuffer,buffer);
+        PForUtil.decompress(encodedBuffer,buffer,header);
       }
     }
 
@@ -115,16 +111,17 @@
     private final byte[] encoded;
     private final IntBuffer encodedBuffer;
 
-    PForIndexOutput(IndexOutput out, int blockSize) throws IOException {
-      super(out,blockSize);
-      this.encoded = new byte[blockSize*8+4];
+    PForIndexOutput(IndexOutput out) throws IOException {
+      super(out, PForPostingsFormat.DEFAULT_BLOCK_SIZE);
+      this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8];
       this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
     }
 
     @Override
     protected void flushBlock() throws IOException {
-      final int numBytes = PForUtil.compress(buffer,buffer.length,encodedBuffer);
-      out.writeInt(numBytes);
+      final int header = PForUtil.compress(buffer,encodedBuffer);
+      final int numBytes = PForUtil.getEncodedSize(header);
+      out.writeInt(header);
       out.writeBytes(encoded, numBytes);
     }
   }
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java	(revision 1361471)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java	(working copy)
@@ -28,100 +28,76 @@
 public final class PForUtil extends ForUtil {
 
   protected static final int[] PER_EXCEPTION_SIZE = {1,2,4};
+  static final int ALL_EQUAL_HEADER = getHeader(1, 0, 0, -1, 0);
 
   /** Compress given int[] into Integer buffer, with PFor format
    *
    * @param data        uncompressed data
-   * @param size        num of ints to compress
    * @param intBuffer   integer buffer to hold compressed data
+   * @return block header
    */
-  public static int compress(final int[] data, int size, IntBuffer intBuffer) {
+  public static int compress(final int[] data, IntBuffer intBuffer) {
     /** estimate minimum compress size to determine numFrameBits */
-    int numBits=getNumBits(data,size);
-  
+    int header = getOptMetadata(data);
+    if (header == ALL_EQUAL_HEADER) {
+      compressDuplicateBlock(data,intBuffer);
+      return header;
+    }
+    int size = data.length;
     int[] excValues = new int[size];
-    int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1; 
+    int[] excPos = new int[size];
 
-    // num of exception until the last non-forced exception
-    int excNumBase = 0;          
-
     // bytes per exception
-    int excBytes = 1;
+    int excBytes = getExcBytes(header);
 
-    // bytes before exception area, e.g. header and normal area
-    int excByteOffset = 0;
+    // number of frame bits
+    int numBits = getNumBits(header);
 
-    // the max value possible for current exception pointer, 
-    // value of the first pointer is limited by header as 254
-    // (first exception ranges from -1 ~ 254)
-    long maxChainFirst = 254;
-    long maxChain = maxChainFirst + 1;  
+    // first postion of exeption, when none, it's -1
+    int excFirstPos = getFirstPos(header);
 
+    // num of exceptions, with trailing forced exception ignored
+    int excNum = getExcNum(header);
+
+    int curExcNum = 0, excLastPos = -1;
     boolean conValue, conForce, conEnd;
+    long maxChain;  
     int i=0;
+    boolean conExc;
 
-    /** estimate exceptions */
-    for (i=0; i<size; ++i) {
-      conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
-      conForce = (i >= maxChain + excLastPos);           // force exception
-      if (conValue || conForce) {
-        excValues[excNum++] = data[i];
-        if (excLastPos == -1) {
-          maxChain = 1L<<numBits; 
-          excFirstPos = i;
-        }
-        if (conValue) {
-          excLastNonForcePos = i;
-          excNumBase = excNum;
-        }
-        excLastPos = i;
-      }
-    }
-
     /** encode normal area, record exception positions */
-    excNum = 0;
     if (excFirstPos < 0) { // no exception 
       for (i=0; i<size; ++i) {
         encodeNormalValue(intBuffer,i,data[i], numBits);
       }
-      excLastPos = -1;
     } else {
-      for (i=0; i<excFirstPos; ++i) {
-        encodeNormalValue(intBuffer,i,data[i], numBits);
-      }
       maxChain = 1L<<numBits;
       excLastPos = excFirstPos;
-      excNum = i<size? 1:0;
-      for (i=excFirstPos+1; i<size; ++i) {
-        conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
+      curExcNum = 0;
+      // record exception positions
+      for (i=excFirstPos; curExcNum<excNum; ++i) {
+        conValue = ((data[i] & ~MASK[numBits]) != 0);      // value exception
         conForce = (i >= maxChain + excLastPos);           // force exception
-        conEnd = (excNum == excNumBase);                   // following forced ignored
-        if ((!conValue && !conForce) || conEnd) {
-          encodeNormalValue(intBuffer,i,data[i], numBits);
-        } else {
-          encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits); 
-          excNum++;
-          excLastPos = i;
-        }
+        conExc = conValue || conForce;
+        excValues[curExcNum] = data[i];
+        excPos[curExcNum] = i;
+        excLastPos = conExc ? i:excLastPos;
+        curExcNum = conExc ? curExcNum+1:curExcNum;
       }
-    }
-  
-    /** encode exception area */
-    for (i=0; i<excNum; ++i) {
-      if (excBytes < 2 && (excValues[i] & ~MASK[8]) != 0) {
-        excBytes=2;
+      // encode normal values
+      for (i=0; i<size; ++i) {
+        encodeNormalValue(intBuffer,i,data[i] & MASK[numBits], numBits);
       }
-      if (excBytes < 4 && (excValues[i] & ~MASK[16]) != 0) {
-        excBytes=4;
+      // patch exception positions
+      for (i=0; i<excNum-1; ++i) {
+        encodeNormalValue(intBuffer, excPos[i], excPos[i+1]-excPos[i]-1, numBits);
       }
     }
-    excByteOffset = HEADER_INT_SIZE*4 + (size*numBits + 7)/8;
+    /** encode exception area */
+    int excByteOffset = (size*numBits + 7)/8;
     encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset);
 
-    /** encode header */
-    encodeHeader(intBuffer, size, numBits, excNum, excFirstPos, excBytes);
-
-    return (excByteOffset + excBytes*excNum + 3)/4*4;
+    return header;
   }
   
   /** Decompress given Integer buffer into int array.
@@ -129,13 +105,10 @@
    * @param intBuffer   integer buffer to hold compressed data
    * @param data        int array to hold uncompressed data
    */
-  public static int decompress(IntBuffer intBuffer, int[] data) {
-
+  public static void decompress(IntBuffer intBuffer, int[] data, int header) {
     // since this buffer is reused at upper level, rewind first
     intBuffer.rewind();
 
-    int header = intBuffer.get();
-    int numInts = (header & MASK[8]);
     int excNum = ((header >> 8) & MASK[8]) + 1;
     int excFirstPos = ((header >> 16) & MASK[8]) - 1;
     int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]];
@@ -144,15 +117,8 @@
     decompressCore(intBuffer, data, numBits);
 
     patchException(intBuffer,data,excNum,excFirstPos,excBytes);
-
-    return numInts;
   }
 
-  static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
-    int header = getHeader(numInts,numBits,excNum,excFirstPos,excBytes);
-    intBuffer.put(0, header);
-  }
-
   /**
    * Encode exception values into exception area.
    * The width for each exception will be fixed as:
@@ -190,6 +156,13 @@
   }
 
   /**
+   * Save only header when the whole block equals to 1
+   */
+  static void compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
+    intBuffer.put(0,data[0]);
+  }
+
+  /**
    * Decode exception values base on the exception pointers in normal area,
    * and values in exception area.
    * As for current implementation, numInts is hardwired as 128, so the
@@ -200,7 +173,6 @@
    * In this case we should preprocess patch several heading exceptions, 
    * before calling this method.
    *
-   * TODO: blockSize is hardewired to size==128 only
    */
   public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) {
     if (excFirstPos == -1) {
@@ -249,84 +221,100 @@
 
   /**
    * Estimate best number of frame bits according to minimum compressed size.
-   * It will run 32 times.
    */
-  static int getNumBits(final int[] data, int size) {
-    if (isAllZero(data))
-      return 0;
-    int optBits=1;
-    int optSize=estimateCompressedSize(data,size,optBits);
-    for (int i=2; i<=32; ++i) {
-      int curSize=estimateCompressedSize(data,size,i);
+  static int getOptMetadata(final int[] data) {
+    if (isAllEqual(data)) {
+      return ALL_EQUAL_HEADER;
+    }
+    int optBits=32;
+    int optHeader=estimate(data,optBits);
+    int optSize = (optHeader & MASK[8]);
+    // for block with much large value, we prefer to 
+    // encode them directly
+    for (int i=28; i>=1; --i) {
+      int curHeader=estimate(data,i);
+      int curSize = (curHeader & MASK[8]);
       if (curSize<optSize) {
+        optHeader=curHeader;
         optSize=curSize;
-        optBits=i;
       }
     }
-    return optBits;
+    return optHeader;
   }
 
-  static boolean isAllZero(final int[] data) {
-    int len=data.length;
-    for (int i=0; i<len; i++) {
-      if (data[i] != 0) {
-        return false;
-      }
-    }
-    return true;
-  }
-
   /**
    * Iterate the whole block to get maximum exception bits, 
    * and estimate compressed size without forced exception.
-   * TODO: foresee forced exception for better estimation
+   * The return value is the header for current strategy, 
+   * which provides enough information for encoder.
    */
-  static int estimateCompressedSize(final int[] data, int size, int numBits) {
-    int totalBytes=(numBits*size+7)/8;   // always round to byte
-    int excNum=0;
-    int curExcBytes=1;
+  static int estimate(final int[] data, int numBits) {
+    int size=data.length;
+    int excFirstPos = -1, excLastPos = -1, excNumBase=0, excNum=0, excBytes=1;
+    int negMask = ~MASK[numBits];
+
+    // the max value possible for current exception pointer, 
+    // value of the first pointer is limited by header as 254
+    // (first exception ranges from -1 ~ 254)
+    long maxChainFirst=254;
+    long maxChain=maxChainFirst + 1;
+
+    // get all ints merged to test msb
+    int merge = 0;
+
+    boolean conValue, conForce;
+
     for (int i=0; i<size; ++i) {
-      if ((data[i] & ~MASK[numBits]) != 0) {   // exception
+      merge |= data[i];
+    }
+    for (int i=0; i<size; ++i) {
+      conValue = ((data[i] & negMask) != 0);    // value exception
+      conForce = (i >= maxChain + excLastPos);  // force exception
+      if (conValue || conForce) {
         excNum++;
-        if (curExcBytes<2 && (data[i] & ~MASK[8]) != 0) { // exceed 1 byte exception
-          curExcBytes=2;
+        if (excLastPos == -1) {
+          maxChain = 1<<numBits; 
+          excFirstPos = i;
         }
-        if (curExcBytes<4 && (data[i] & ~MASK[16]) != 0) { // exceed 2 byte exception
-          curExcBytes=4;
+        if (conValue) {
+          excNumBase = excNum;
         }
+        excLastPos = i;
       }
     }
-    if (curExcBytes==2) {
-      totalBytes=((totalBytes+1)/2)*2;  // round up to 2x bytes before filling exceptions
+    excNum = excNumBase;
+    if ((merge & ~MASK[16]) != 0) { // exceed 2 byte exception
+      excBytes = 4;
+    } 
+    else if ((merge & ~MASK[8]) != 0) { // exceed 1 byte exception
+      excBytes = 2;
     }
-    else if (curExcBytes==4) {
-      totalBytes=((totalBytes+3)/4)*4;  // round up to 4x bytes
+    else {
+      excBytes = 1;
     }
-    totalBytes+=excNum*curExcBytes;
 
-    return totalBytes/4*4+HEADER_INT_SIZE;  // round up to ints
+    // normal area should round to bytes, 
+    // and the whold compressed block should round to int
+    int encodedSize=((numBits*size+7)/8 + excBytes*excNum +3)/4;
+
+    return getHeader(encodedSize, numBits, excNum, excFirstPos, excBytes);
   }
 
   /** 
-   * Generate the 4 byte header, which contains (from lsb to msb):
+   * Generate the 4 byte header which contains (from lsb to msb):
    *
-   * 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
+   * 8 bits for encoded block int size (excluding header, this limits DEFAULT_BLOCK_SIZE <= 2^(8-1))
    *
    * 8 bits for exception num - 1 (when no exceptions, this is undefined)
    *
    * 8 bits for the index of the first exception + 1 (when no exception, this is 0)
    *
-   * 6 bits for num of frame bits
+   * 6 bits for num of frame bits (when 0, values in this block are all the same)
    * 2 bits for the exception code: 00: byte, 01: short, 10: int
    *
    */
-  // TODO: exception num should never be equal with uncompressed int num!!!
-  // first exception ranges from -1 ~ 255
-  // the problem is that we don't need first exception to be -1 ...
-  // it is ok to range from 0~255, and judge exception for exception num (0~255)
-  // uncompressed int num: (1~256)
-  static int getHeader(int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
-    return  (numInts-1)
+  static int getHeader(int encodedSize, int numBits, int excNum, int excFirstPos, int excBytes) {
+    return  (encodedSize)
           | (((excNum-1) & MASK[8]) << 8)
           | ((excFirstPos+1) << 16)
           | ((numBits) << 24)
@@ -337,11 +325,11 @@
   /** 
    * Expert: get metadata from header. 
    */
-  public static int getNumInts(int header) {
-    return (header & MASK[8]) + 1;
+  public static int getEncodedSize(int header) {
+    return ((header & MASK[8]))*4;
   }
   public static int getExcNum(int header) {
-    return ((header >> 8) & MASK[8]) + 1;
+    return getFirstPos(header)<0? 0 : ((header >> 8) & MASK[8]) + 1;
   }
   public static int getFirstPos(int header) {
     return ((header >> 16) & MASK[8]) - 1;
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py	(revision 1361471)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py	(working copy)
@@ -78,7 +78,7 @@
     w('\n  // NOTE: hardwired to blockSize == 128\n\n')
 
     w('  public static void decode0(final IntBuffer compressedBuffer, final int[] output) {\n')
-    w('    Arrays.fill(output, 0);\n')
+    w('    Arrays.fill(output, compressedBuffer.get());\n')
     w('  }\n')
 
     for numFrameBits in xrange(1, 33):
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java	(revision 1361471)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java	(working copy)
@@ -45,20 +45,17 @@
  * customized postings format plugged.
  */
 public final class PForPostingsFormat extends PostingsFormat {
-  private final int blockSize;
   private final int minBlockSize;
   private final int maxBlockSize;
   public final static int DEFAULT_BLOCK_SIZE = 128;
 
   public PForPostingsFormat() {
     super("PFor");
-    this.blockSize = DEFAULT_BLOCK_SIZE;
     this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
     this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
   }
   public PForPostingsFormat(int minBlockSize, int maxBlockSize) {
     super("PFor");
-    this.blockSize = DEFAULT_BLOCK_SIZE;
     this.minBlockSize = minBlockSize;
     assert minBlockSize > 1;
     this.maxBlockSize = maxBlockSize;
@@ -67,7 +64,7 @@
 
   @Override
   public String toString() {
-    return getName() + "(blocksize=" + blockSize + ")";
+    return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE+ ")";
   }
 
   @Override
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java	(revision 1361471)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java	(working copy)
@@ -28,7 +28,7 @@
   // NOTE: hardwired to blockSize == 128
 
   public static void decode0(final IntBuffer compressedBuffer, final int[] output) {
-    Arrays.fill(output, 0);
+    Arrays.fill(output, compressedBuffer.get());
   }
   public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
     final int numFrameBits = 1;
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java	(revision 1361471)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java	(working copy)
@@ -39,11 +39,7 @@
 
 public final class ForFactory extends IntStreamFactory {
 
-  /* number of ints for each block */
-  private final int blockSize; 
-
   public ForFactory() {
-    this.blockSize = ForPostingsFormat.DEFAULT_BLOCK_SIZE;
   }
 
   @Override
@@ -51,7 +47,7 @@
     boolean success = false;
     IndexOutput out = dir.createOutput(fileName, context);
     try {
-      IntIndexOutput ret = new ForIndexOutput(out, blockSize);
+      IntIndexOutput ret = new ForIndexOutput(out);
       success = true;
       return ret;
     } finally {
@@ -85,10 +81,9 @@
       private final IntBuffer encodedBuffer;
 
       ForBlockReader(final IndexInput in, final int[] buffer) {
-        // upperbound for encoded value should include:
-        // 1. blockSize of normal value when numFrameBits=32(4x bytes); 
-        // 2. header (4bytes);
-        this.encoded = new byte[blockSize*4+4]; 
+        // upperbound for encoded value should include(here header is not buffered):
+        // blockSize of normal value when numFrameBits=32(4x bytes); 
+        this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4]; 
         this.in = in;
         this.buffer = buffer;
         this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
@@ -97,10 +92,11 @@
       // TODO: implement public void skipBlock() {} ?
       @Override
       public void readBlock() throws IOException {
-        final int numBytes = in.readInt();
-        assert numBytes <= blockSize*4+4;
+        final int header = in.readInt();
+        final int numBytes = ForUtil.getEncodedSize(header);
+        assert numBytes <= ForPostingsFormat.DEFAULT_BLOCK_SIZE*4;
         in.readBytes(encoded,0,numBytes);
-        ForUtil.decompress(encodedBuffer,buffer);
+        ForUtil.decompress(encodedBuffer,buffer,header);
       }
     }
 
@@ -114,16 +110,17 @@
     private final byte[] encoded;
     private final IntBuffer encodedBuffer;
 
-    ForIndexOutput(IndexOutput out, int blockSize) throws IOException {
-      super(out,blockSize);
-      this.encoded = new byte[blockSize*4+4];
+    ForIndexOutput(IndexOutput out) throws IOException {
+      super(out,ForPostingsFormat.DEFAULT_BLOCK_SIZE);
+      this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4];
       this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
     }
 
     @Override
     protected void flushBlock() throws IOException {
-      final int numBytes = ForUtil.compress(buffer,buffer.length,encodedBuffer);
-      out.writeInt(numBytes);
+      final int header = ForUtil.compress(buffer,encodedBuffer);
+      final int numBytes = ForUtil.getEncodedSize(header);
+      out.writeInt(header);
       out.writeBytes(encoded, numBytes);
     }
   }
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java	(revision 1361471)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java	(working copy)
@@ -25,7 +25,6 @@
  * which is determined by the max value in this block.
  */
 public class ForUtil {
-  public static final int HEADER_INT_SIZE=1;
   protected static final int[] MASK = {   0x00000000,
     0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
     0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
@@ -33,41 +32,51 @@
     0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
     0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
     0x7fffffff, 0xffffffff};
+    static final int ALL_EQUAL_HEADER = getHeader(1, 0);
 
   /** Compress given int[] into Integer buffer, with For format
    *
    * @param data        uncompressed data
    * @param size        num of ints to compress
    * @param intBuffer   integer buffer to hold compressed data
+   * @return encoded block byte size
    */
-  public static int compress(final int[] data, int size, IntBuffer intBuffer) {
-    int numBits=getNumBits(data,size);
-  
+  public static int compress(final int[] data, IntBuffer intBuffer) {
+    int numBits=getNumBits(data);
+    if (numBits == 0) {
+      compressDuplicateBlock(data,intBuffer);
+      return ALL_EQUAL_HEADER;
+    }
+ 
+    int size=data.length;
+    int encodedSize = (size*numBits+31)/32;
+
     for (int i=0; i<size; ++i) {
       encodeNormalValue(intBuffer,i,data[i], numBits);
     }
-    encodeHeader(intBuffer, size, numBits);
 
-    return (HEADER_INT_SIZE+(size*numBits+31)/32)*4;
+    return getHeader(encodedSize, numBits);
   }
 
+  /**
+   * Save only one int when the whole block equals to 1
+   */
+  static void compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
+    intBuffer.put(0,data[0]);
+  }
+
   /** Decompress given Integer buffer into int array.
    *
    * @param intBuffer   integer buffer to hold compressed data
    * @param data        int array to hold uncompressed data
    */
-  public static int decompress(IntBuffer intBuffer, int[] data) {
-
+  public static void decompress(IntBuffer intBuffer, int[] data, int header) {
     // since this buffer is reused at upper level, rewind first
     intBuffer.rewind();
 
-    int header = intBuffer.get();
-    int numInts = (header & MASK[8]) + 1;
     int numBits = ((header >> 8) & MASK[6]);
 
     decompressCore(intBuffer, data, numBits);
-
-    return numInts;
   }
 
   /**
@@ -117,15 +126,10 @@
     }
   }
 
-  static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits) {
-    int header = getHeader(numInts,numBits);
-    intBuffer.put(0, header);
-  }
-
   static void encodeNormalValue(IntBuffer intBuffer, int pos, int value, int numBits) {
     final int globalBitPos = numBits*pos;           // position in bit stream
     final int localBitPos = globalBitPos & 31;      // position inside an int
-    int intPos = HEADER_INT_SIZE + globalBitPos/32; // which integer to locate 
+    int intPos = globalBitPos/32; // which integer to locate 
     setBufferIntBits(intBuffer, intPos, localBitPos, numBits, value);
     if ((localBitPos + numBits) > 32) { // value does not fit in this int, fill tail
       setBufferIntBits(intBuffer, intPos+1, 0, 
@@ -145,8 +149,12 @@
   /**
    * Estimate best num of frame bits according to the largest value.
    */
-  static int getNumBits(final int[] data, int size) {
-    int optBits=0;
+  static int getNumBits(final int[] data) {
+    if (isAllEqual(data)) {
+      return 0;
+    }
+    int size=data.length;
+    int optBits=1;
     for (int i=0; i<size; ++i) {
       while ((data[i] & ~MASK[optBits]) != 0) {
         optBits++;
@@ -155,16 +163,37 @@
     return optBits;
   }
 
+  protected static boolean isAllEqual(final int[] data) {
+    int len = data.length;
+    int v = data[0];
+    for (int i=1; i<len; i++) {
+      if (data[i] != v) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   /** 
    * Generate the 4 byte header, which contains (from lsb to msb):
    *
-   * - 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
-   * - 6 bits for num of frame bits
-   * - other bits unused
+   * 8 bits for encoded block int size (excluded header, this limits DEFAULT_BLOCK_SIZE <= 2^8)
+   * 6 bits for num of frame bits (when 0, values in this block are all the same)
+   * other bits unused
    *
    */
-  static int getHeader(int numInts, int numBits) {
-    return  (numInts-1)
+  static int getHeader(int encodedSize, int numBits) {
+    return  (encodedSize)
           | ((numBits) << 8);
   }
+
+  /** 
+   * Expert: get metadata from header. 
+   */
+  public static int getEncodedSize(int header) {
+    return ((header & MASK[8]))*4;
+  }
+  public static int getNumBits(int header) {
+    return ((header >> 8) & MASK[6]);
+  }
 }
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java	(revision 1361471)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java	(working copy)
@@ -17,48 +17,46 @@
  * limitations under the License.
  */
 
+import java.io.IOException;
 import java.util.Set;
-import java.io.IOException;
 
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.codecs.BlockTreeTermsReader;
+import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
-import org.apache.lucene.codecs.BlockTreeTermsReader;
-import org.apache.lucene.codecs.TermsIndexReaderBase;
-import org.apache.lucene.codecs.TermsIndexWriterBase;
 import org.apache.lucene.codecs.FixedGapTermsIndexReader;
 import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
 import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.TermsIndexReaderBase;
+import org.apache.lucene.codecs.TermsIndexWriterBase;
 import org.apache.lucene.codecs.sep.SepPostingsReader;
 import org.apache.lucene.codecs.sep.SepPostingsWriter;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
 
 /**
  * Pass ForFactory to a PostingsWriter/ReaderBase, and get 
  * customized postings format plugged.
  */
 public final class ForPostingsFormat extends PostingsFormat {
-  private final int blockSize;
   private final int minBlockSize;
   private final int maxBlockSize;
   public final static int DEFAULT_BLOCK_SIZE = 128;
 
   public ForPostingsFormat() {
     super("For");
-    this.blockSize = DEFAULT_BLOCK_SIZE;
     this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
     this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
   }
 
   public ForPostingsFormat(int minBlockSize, int maxBlockSize) {
     super("For");
-    this.blockSize = DEFAULT_BLOCK_SIZE;
     this.minBlockSize = minBlockSize;
     assert minBlockSize > 1;
     this.maxBlockSize = maxBlockSize;
@@ -67,7 +65,7 @@
 
   @Override
   public String toString() {
-    return getName() + "(blocksize=" + blockSize + ")";
+    return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE + ")";
   }
 
   @Override
Index: build.xml
===================================================================
--- build.xml	(revision 1361471)
+++ build.xml	(working copy)
@@ -74,13 +74,19 @@
 
   <target name="compile" description="Compile Lucene and Solr">
     <sequential>
-
       <subant target="compile" inheritall="false" failonerror="true">
         <fileset dir="lucene" includes="build.xml" />
         <fileset dir="solr" includes="build.xml" />
       </subant>
     </sequential>
   </target>
+  <target name="compile-core" description="Compile">
+    <sequential>
+      <subant target="compile-core" inheritall="false" failonerror="true">
+        <fileset dir="lucene" includes="build.xml" />
+      </subant>
+    </sequential>
+  </target>
 
   <property name="version" value="5.0-SNAPSHOT"/>
   <property name="maven-build-dir" value="maven-build"/>
