Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java	(revision 1371251)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java	(working copy)
@@ -215,9 +215,9 @@
   }
 
   private void writeBlock(int[] buffer, IndexOutput out) throws IOException {
-    final int header = ForUtil.compress(buffer, encodedBuffer);
+    final int header = PForUtil.compress(buffer, encodedBuffer);
     out.writeVInt(header);
-    out.writeBytes(encoded, ForUtil.getEncodedSize(header));
+    out.writeBytes(encoded, PForUtil.getEncodedSize(header));
   }
 
   @Override
Index: lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java	(revision 1371251)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java	(working copy)
@@ -24,7 +24,7 @@
  * Encode all values in normal area with fixed bit width, 
  * which is determined by the max value in this block.
  */
-public final class ForUtil {
+public class ForUtil {
   protected static final int[] MASK = {   0x00000000,
     0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
     0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
@@ -78,7 +78,7 @@
     decompressCore(intBuffer, data, numBits);
   }
 
-  public static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) {
+  static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) {
     switch(numBits) {
       case 0: PackedIntsDecompress.decode0(intBuffer, data); break;
       case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
@@ -155,7 +155,7 @@
   }
 
   // nocommit: we must have a util function for this, hmm?
-  protected static boolean isAllEqual(final int[] data) {
+  static boolean isAllEqual(final int[] data) {
     int len = data.length;
     int v = data[0];
     for (int i=1; i<len; i++) {
Index: lucene/core/src/java/org/apache/lucene/codecs/block/PForUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/PForUtil.java	(revision 0)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/PForUtil.java	(working copy)
@@ -0,0 +1,352 @@
+package org.apache.lucene.codecs.block;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.nio.IntBuffer;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE;
+
+/**
+ * Encode all small values and exception pointers in normal area;
+ * Encode large values in exception area;
+ * Size per exception is variable, possibly: 1byte, 2bytes, or 4bytes
+ */
+public final class PForUtil extends ForUtil {
+
+  protected static final int[] PER_EXCEPTION_SIZE = {1,2,4};
+
+  /** Compress given int[] into Integer buffer, with PFor format
+   *
+   * @param data        uncompressed data
+   * @param intBuffer   integer buffer to hold compressed data
+   * @return block header
+   */
+  public static int compress(final int[] data, IntBuffer intBuffer) {
+    /** estimate minimum compress size to determine numFrameBits */
+    int numBits=getNumBits(data);
+    if (numBits == 0) {
+      return compressDuplicateBlock(data,intBuffer);
+    }
+ 
+    int size = data.length;
+    int[] excValues = new int[size];
+    int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1; 
+
+    // num of exception until the last non-forced exception
+    int excNumBase = 0;          
+
+    // bytes per exception
+    int excBytes = 1;
+
+    // bytes before exception area, e.g. header and normal area
+    int excByteOffset = 0;
+
+    // the max value possible for current exception pointer, 
+    // value of the first pointer is limited by header as 254
+    // (first exception ranges from -1 ~ 254)
+    long maxChainFirst = 254;
+    long maxChain = maxChainFirst + 1;  
+
+    boolean conValue, conForce, conEnd;
+    int i=0;
+
+    /** estimate exceptions */
+    for (i=0; i<size; ++i) {
+      conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
+      conForce = (i >= maxChain + excLastPos);           // force exception
+      if (conValue || conForce) {
+        excValues[excNum++] = data[i];
+        if (excLastPos == -1) {
+          maxChain = 1L<<numBits; 
+          excFirstPos = i;
+        }
+        if (conValue) {
+          excLastNonForcePos = i;
+          excNumBase = excNum;
+        }
+        excLastPos = i;
+      }
+    }
+
+    /** encode normal area, record exception positions */
+    excNum = 0;
+    if (excFirstPos < 0) { // no exception 
+      for (i=0; i<size; ++i) {
+        encodeNormalValue(intBuffer,i,data[i], numBits);
+      }
+      excLastPos = -1;
+    } else {
+      for (i=0; i<excFirstPos; ++i) {
+        encodeNormalValue(intBuffer,i,data[i], numBits);
+      }
+      maxChain = 1L<<numBits;
+      excLastPos = excFirstPos;
+      excNum = i<size? 1:0;
+      for (i=excFirstPos+1; i<size; ++i) {
+        conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
+        conForce = (i >= maxChain + excLastPos);           // force exception
+        conEnd = (excNum == excNumBase);                   // following forced ignored
+        if ((!conValue && !conForce) || conEnd) {
+          encodeNormalValue(intBuffer, i, data[i], numBits);
+        } else {
+          encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits); 
+          excNum++;
+          excLastPos = i;
+        }
+      }
+    }
+  
+    /** encode exception area */
+    for (i=0; i<excNum; ++i) {
+      if (excBytes < 2 && (excValues[i] & ~MASK[8]) != 0) {
+        excBytes=2;
+      }
+      if (excBytes < 4 && (excValues[i] & ~MASK[16]) != 0) {
+        excBytes=4;
+      }
+    }
+    excByteOffset = (size*numBits + 7)/8;
+    encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset);
+
+    return getHeader(numBits, excNum, excFirstPos, excBytes);
+  }
+  
+  /** Decompress given Integer buffer into int array.
+   *
+   * @param intBuffer   integer buffer to hold compressed data
+   * @param data        int array to hold uncompressed data
+   * @param header      metadata for current block
+   */
+  public static void decompress(IntBuffer intBuffer, int[] data, int header) {
+    // since this buffer is reused at upper level, rewind first
+    intBuffer.rewind();
+
+    int excNum = ((header) & MASK[8]) + 1;
+    int excFirstPos = ((header >> 8) & MASK[8]) - 1;
+    int excBytes = PER_EXCEPTION_SIZE[(header >> 24) & MASK[2]];
+    int numBits = ((header >> 16) & MASK[6]);
+
+    decompressCore(intBuffer, data, numBits);
+
+    patchException(intBuffer,data,excNum,excFirstPos,excBytes);
+  }
+
+  /**
+   * Encode exception values into exception area.
+   * The width for each exception will be fixed as:
+   * 1, 2, or 4 byte(s).
+   */
+  static void encodeExcValues(IntBuffer intBuffer, int[] values, int num, int perbytes, int byteOffset) {
+    if (num == 0)
+      return;
+    if (perbytes == 1) {
+      int curBytePos = byteOffset;
+      for (int i=0; i<num; ++i) {
+        int curIntPos = curBytePos / 4;
+        setBufferIntBits(intBuffer, curIntPos, (curBytePos & 3)*8, 8, values[i]);
+        curBytePos++;
+      }
+    } else if (perbytes == 2) {
+      int shortOffset = (byteOffset+1)/2;
+      int curIntPos = shortOffset/2;
+      int i=0;
+      if ((shortOffset & 1) == 1) {  // cut head to ensure remaining fit ints
+        setBufferIntBits(intBuffer, curIntPos++, 16, 16, values[i++]); 
+      }
+      for (; i<num-1; i+=2) {
+        intBuffer.put(curIntPos++, (values[i+1]<<16) | values[i]);
+      }
+      if (i<num) {
+        intBuffer.put(curIntPos, values[i]); // cut tail, also clear high 16 bits
+      }
+    } else if (perbytes == 4) {
+      int curIntPos = (byteOffset+3) / 4;
+      for (int i=0; i<num; ++i) {
+        intBuffer.put(curIntPos++, values[i]);
+      }
+    }
+  }
+
+  /**
+   * Save only header when the whole block equals to 1
+   */
+  static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
+    intBuffer.put(0,data[0]);
+    return getHeader(0, 0, -1, 0);
+  }
+
+  /**
+   * Decode exception values base on the exception pointers in normal area,
+   * and values in exception area.
+   * As for current implementation, numInts is hardwired as 128, so the
+   * tail of normal area is naturally aligned to 32 bits, and we don't need to
+   * rewind intBuffer here.
+   * However, the normal area may share a same int with exception area, 
+   * when numFrameBits * numInts % 32 != 0,
+   * In this case we should preprocess patch several heading exceptions, 
+   * before calling this method.
+   *
+   */
+  public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) {
+    if (excFirstPos == -1) {
+      return;
+    }
+    int curPos=excFirstPos;
+    int i,j;
+
+    if (excBytes == 1) { // each exception consumes 1 byte
+      for (i=0; i+3<excNum; i+=4) {
+        final int curInt = intBuffer.get();
+        curPos = patch(data, curPos, (curInt) & MASK[8]);
+        curPos = patch(data, curPos, (curInt >>> 8)  & MASK[8]);
+        curPos = patch(data, curPos, (curInt >>> 16) & MASK[8]);
+        curPos = patch(data, curPos, (curInt >>> 24) & MASK[8]);
+      }
+      if (i<excNum) {
+        final int curInt = intBuffer.get();
+        for (j=0; j<32 && i<excNum; j+=8,i++) {
+          curPos = patch(data, curPos, (curInt >>> j) & MASK[8]);
+        }
+      }
+    } else if (excBytes == 2) { // each exception consumes 2 bytes
+      for (i=0; i+1<excNum; i+=2) {
+        final int curInt = intBuffer.get();
+        curPos = patch(data, curPos, (curInt) & MASK[16]);
+        curPos = patch(data, curPos, (curInt >>> 16) & MASK[16]);
+      }
+      if (i<excNum) {
+        final int curInt = intBuffer.get();
+        curPos = patch(data, curPos, (curInt) & MASK[16]);
+      }
+    } else if (excBytes == 4) { // each exception consumes 4 bytes
+      for (i=0; i<excNum; i++) {
+        curPos = patch(data, curPos, intBuffer.get());
+      }
+    }
+  }
+
+  static int patch(int[]data, int pos, int value) {
+    int nextPos = data[pos] + pos + 1;
+    data[pos] = value;
+    assert nextPos > pos;
+    return nextPos;
+  }
+
+  /**
+   * Estimate best number of frame bits according to minimum compressed size.
+   * It will run 32 times.
+   */
+  static int getNumBits(final int[] data) {
+    if (isAllEqual(data)) {
+      return 0;
+    }
+    int optBits=32;
+    int optSize=estimateCompressedSize(data,optBits);
+    for (int i=28; i>=1; --i) {
+      int curSize=estimateCompressedSize(data,i);
+      if (curSize<optSize) {
+        optSize=curSize;
+        optBits=i;
+      }
+    }
+    return optBits;
+  }
+
+  /**
+   * Iterate the whole block to get maximum exception bits, 
+   * and estimate compressed size without forced exception.
+   * Based on our wiki data test, forced exceptions can be ignored here,
+   * if we prefer a larger numBits in the iteration.
+   */
+  static int estimateCompressedSize(final int[] data, int numBits) {
+    int size=data.length;
+    int totalBytes=(numBits*size+7)/8;   // always round to byte
+    int excNum=0;
+    int excBytes=1;
+    int merge = 0;
+    for (int i=0; i<size; ++i) {
+      merge |= data[i];
+      if ((data[i] & ~MASK[numBits]) != 0) {   // exception
+        excNum++;
+      }
+    }
+
+    if ((merge & ~MASK[16]) != 0) { // exceed 2 byte exception
+      excBytes = 4;
+      totalBytes = ((totalBytes+3)/4)*4;  // round up to 4x bytes
+    } 
+    else if ((merge & ~MASK[8]) != 0) { // exceed 1 byte exception
+      excBytes = 2;
+      totalBytes = ((totalBytes+1)/2)*2;  // round up to 2x bytes before filling exceptions
+    }
+    else {
+      excBytes = 1;
+    }
+    totalBytes+=excNum*excBytes;
+
+    return totalBytes/4*4;  // round up to ints
+  }
+
+  /** 
+   * Generate the 4 byte header which contains (from lsb to msb):
+   *
+   * 8 bits for exception num - 1 (when no exceptions, this is undefined)
+   * (should change this when blockSize is larger than 256)
+   *
+   * 8 bits for the index of the first exception + 1 (when no exception, this is 0)
+   *
+   * 6 bits for num of frame bits (when 0, values in this block are all the same)
+   *
+   * 2 bits for the exception code: 00: byte, 01: short, 10: int
+   *
+   * other bits unused
+   *
+   */
+  static int getHeader(int numBits, int excNum, int excFirstPos, int excBytes) {
+    return (((excNum-1) & MASK[8]))
+          | ((excFirstPos+1) << 8)
+          | ((numBits) << 16)
+          | ((excBytes/2) << 24);
+  }
+
+
+  /** 
+   * Expert: get metadata from header. 
+   */
+  public static int getEncodedSize(int header) {
+    int numBits = getNumBits(header);
+    int excBytes = getExcBytes(header);
+    int excNum = getExcNum(header);
+    return numBits == 0 ? 4 : 
+          ((numBits * BLOCK_SIZE+ 7)/8 + 
+           ( excBytes * excNum + 3))/4*4;
+  }
+  public static int getExcNum(int header) {
+    return getFirstPos(header) == -1? 0 : (header & MASK[8]) + 1;
+  }
+  public static int getFirstPos(int header) {
+    return ((header >> 8) & MASK[8]) - 1;
+  }
+  public static int getExcBytes(int header) {
+    return PER_EXCEPTION_SIZE[(header >> 24) & MASK[2]];
+  }
+  public static int getNumBits(int header) {
+    return ((header >> 16) & MASK[6]);
+  }
+}
Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java	(revision 1371251)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java	(working copy)
@@ -139,13 +139,13 @@
 
   static void readBlock(IndexInput in, byte[] encoded, IntBuffer encodedBuffer, int[] buffer) throws IOException {
     int header = in.readVInt();
-    in.readBytes(encoded, 0, ForUtil.getEncodedSize(header));
-    ForUtil.decompress(encodedBuffer, buffer, header);
+    in.readBytes(encoded, 0, PForUtil.getEncodedSize(header));
+    PForUtil.decompress(encodedBuffer, buffer, header);
   }
 
   static void skipBlock(IndexInput in) throws IOException {
     int header = in.readVInt();
-    in.seek(in.getFilePointer() + ForUtil.getEncodedSize(header));
+    in.seek(in.getFilePointer() + PForUtil.getEncodedSize(header));
   }
 
   // Must keep final because we do non-standard clone
