Index: lucene/core/src/test/org/apache/lucene/codecs/pfor/TestForUtil.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/codecs/pfor/TestForUtil.java	(revision 1352207)
+++ lucene/core/src/test/org/apache/lucene/codecs/pfor/TestForUtil.java	(working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.pfor;
+package org.apache.lucene;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -23,6 +23,7 @@
 import org.apache.lucene.codecs.pfor.*;
 import org.apache.lucene.util.LuceneTestCase;
 
+// will test nothing since I unfolded this method
 public class TestForUtil extends LuceneTestCase {
   static final int[] MASK={ 0x00000000,
     0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
@@ -43,7 +44,6 @@
     tryForcedException();
     tryAllDistribution();
   }
-
   // Test correctness of ignored forced exception
   public void tryForcedException() throws Exception {
     int sz=128;
@@ -67,13 +67,13 @@
       ensz=sz*8+4;
     }
     resBuffer.rewind();
-    ForUtil.decompress(resBuffer,copy);
+//    ForUtil.decompress(resBuffer,copy); 
 
 //    println(getHex(data,sz)+"\n");
 //    println(getHex(res,ensz)+"\n");
 //    println(getHex(copy,sz)+"\n");
     
-    assert cmp(data,sz,copy,sz)==true;
+//    assert cmp(data,sz,copy,sz)==true;
   }
 
   // Test correctness of compressing and decompressing
@@ -107,13 +107,13 @@
     }
     int[] copy = new int[sz];
 
-    ForUtil.decompress(resBuffer,copy);
+//    ForUtil.decompress(resBuffer,copy);
 
 //    println(getHex(data,sz)+"\n");
 //    println(getHex(res,ensz)+"\n");
 //    println(getHex(copy,sz)+"\n");
 
-    assert cmp(data,sz,copy,sz)==true;
+//    assert cmp(data,sz,copy,sz)==true;
   }
   public boolean cmp(int[] a, int sza, int[] b, int szb) {
     if (sza!=szb)
@@ -126,7 +126,7 @@
     }
     return true;
   }
-  public static String getHex( byte [] raw, int sz ) {
+  public static String getHex(final byte [] raw, int sz ) {
     final String HEXES = "0123456789ABCDEF";
     if ( raw == null ) {
       return null;
@@ -142,7 +142,7 @@
     }
     return hex.toString();
   }
-  public static String getHex( int [] raw, int sz ) {
+  public static String getHex(final int [] raw, int sz ) {
     if ( raw == null ) {
       return null;
     }
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForDecompressImpl.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForDecompressImpl.java	(revision 1352207)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForDecompressImpl.java	(working copy)
@@ -24,12 +24,13 @@
   // nocommit: assess perf of this to see if specializing is really needed
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode1(final int[] input, final int[] output) {
     final int numFrameBits = 1;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 1) & mask;
       output[2 + outputOffset] = (intValue0 >>> 2) & mask;
@@ -63,17 +64,19 @@
       output[30 + outputOffset] = (intValue0 >>> 30) & mask;
       output[31 + outputOffset] = intValue0 >>> 31;
       outputOffset += 32;
+      inputOffset += 1;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode2(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode2(final int[] input, final int[] output) {
     final int numFrameBits = 2;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 2) & mask;
       output[2 + outputOffset] = (intValue0 >>> 4) & mask;
@@ -107,18 +110,20 @@
       output[30 + outputOffset] = (intValue1 >>> 28) & mask;
       output[31 + outputOffset] = intValue1 >>> 30;
       outputOffset += 32;
+      inputOffset += 2;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode3(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode3(final int[] input, final int[] output) {
     final int numFrameBits = 3;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 3) & mask;
       output[2 + outputOffset] = (intValue0 >>> 6) & mask;
@@ -152,19 +157,21 @@
       output[30 + outputOffset] = (intValue2 >>> 26) & mask;
       output[31 + outputOffset] = intValue2 >>> 29;
       outputOffset += 32;
+      inputOffset += 3;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode4(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode4(final int[] input, final int[] output) {
     final int numFrameBits = 4;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 4) & mask;
       output[2 + outputOffset] = (intValue0 >>> 8) & mask;
@@ -198,20 +205,22 @@
       output[30 + outputOffset] = (intValue3 >>> 24) & mask;
       output[31 + outputOffset] = intValue3 >>> 28;
       outputOffset += 32;
+      inputOffset += 4;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode5(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode5(final int[] input, final int[] output) {
     final int numFrameBits = 5;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 5) & mask;
       output[2 + outputOffset] = (intValue0 >>> 10) & mask;
@@ -245,21 +254,23 @@
       output[30 + outputOffset] = (intValue4 >>> 22) & mask;
       output[31 + outputOffset] = intValue4 >>> 27;
       outputOffset += 32;
+      inputOffset += 5;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode6(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode6(final int[] input, final int[] output) {
     final int numFrameBits = 6;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 6) & mask;
       output[2 + outputOffset] = (intValue0 >>> 12) & mask;
@@ -293,22 +304,24 @@
       output[30 + outputOffset] = (intValue5 >>> 20) & mask;
       output[31 + outputOffset] = intValue5 >>> 26;
       outputOffset += 32;
+      inputOffset += 6;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode7(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode7(final int[] input, final int[] output) {
     final int numFrameBits = 7;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 7) & mask;
       output[2 + outputOffset] = (intValue0 >>> 14) & mask;
@@ -342,23 +355,25 @@
       output[30 + outputOffset] = (intValue6 >>> 18) & mask;
       output[31 + outputOffset] = intValue6 >>> 25;
       outputOffset += 32;
+      inputOffset += 7;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode8(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode8(final int[] input, final int[] output) {
     final int numFrameBits = 8;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 8) & mask;
       output[2 + outputOffset] = (intValue0 >>> 16) & mask;
@@ -392,24 +407,26 @@
       output[30 + outputOffset] = (intValue7 >>> 16) & mask;
       output[31 + outputOffset] = intValue7 >>> 24;
       outputOffset += 32;
+      inputOffset += 8;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode9(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode9(final int[] input, final int[] output) {
     final int numFrameBits = 9;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 9) & mask;
       output[2 + outputOffset] = (intValue0 >>> 18) & mask;
@@ -443,25 +460,27 @@
       output[30 + outputOffset] = (intValue8 >>> 14) & mask;
       output[31 + outputOffset] = intValue8 >>> 23;
       outputOffset += 32;
+      inputOffset += 9;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode10(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode10(final int[] input, final int[] output) {
     final int numFrameBits = 10;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 10) & mask;
       output[2 + outputOffset] = (intValue0 >>> 20) & mask;
@@ -495,26 +514,28 @@
       output[30 + outputOffset] = (intValue9 >>> 12) & mask;
       output[31 + outputOffset] = intValue9 >>> 22;
       outputOffset += 32;
+      inputOffset += 10;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode11(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode11(final int[] input, final int[] output) {
     final int numFrameBits = 11;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 11) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 22) | (intValue1 << 10)) & mask;
@@ -548,27 +569,29 @@
       output[30 + outputOffset] = (intValue10 >>> 10) & mask;
       output[31 + outputOffset] = intValue10 >>> 21;
       outputOffset += 32;
+      inputOffset += 11;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode12(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode12(final int[] input, final int[] output) {
     final int numFrameBits = 12;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 12) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 24) | (intValue1 << 8)) & mask;
@@ -602,28 +625,30 @@
       output[30 + outputOffset] = (intValue11 >>> 8) & mask;
       output[31 + outputOffset] = intValue11 >>> 20;
       outputOffset += 32;
+      inputOffset += 12;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode13(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode13(final int[] input, final int[] output) {
     final int numFrameBits = 13;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 13) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 26) | (intValue1 << 6)) & mask;
@@ -657,29 +682,31 @@
       output[30 + outputOffset] = (intValue12 >>> 6) & mask;
       output[31 + outputOffset] = intValue12 >>> 19;
       outputOffset += 32;
+      inputOffset += 13;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode14(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode14(final int[] input, final int[] output) {
     final int numFrameBits = 14;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 14) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 28) | (intValue1 << 4)) & mask;
@@ -713,30 +740,32 @@
       output[30 + outputOffset] = (intValue13 >>> 4) & mask;
       output[31 + outputOffset] = intValue13 >>> 18;
       outputOffset += 32;
+      inputOffset += 14;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode15(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode15(final int[] input, final int[] output) {
     final int numFrameBits = 15;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 15) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask;
@@ -770,31 +799,33 @@
       output[30 + outputOffset] = (intValue14 >>> 2) & mask;
       output[31 + outputOffset] = intValue14 >>> 17;
       outputOffset += 32;
+      inputOffset += 15;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode16(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode16(final int[] input, final int[] output) {
     final int numFrameBits = 16;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = intValue0 >>> 16;
       output[2 + outputOffset] = intValue1 & mask;
@@ -828,32 +859,34 @@
       output[30 + outputOffset] = intValue15 & mask;
       output[31 + outputOffset] = intValue15 >>> 16;
       outputOffset += 32;
+      inputOffset += 16;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode17(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode17(final int[] input, final int[] output) {
     final int numFrameBits = 17;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 17) | (intValue1 << 15)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 2) & mask;
@@ -887,33 +920,35 @@
       output[30 + outputOffset] = ((intValue15 >>> 30) | (intValue16 << 2)) & mask;
       output[31 + outputOffset] = intValue16 >>> 15;
       outputOffset += 32;
+      inputOffset += 17;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode18(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode18(final int[] input, final int[] output) {
     final int numFrameBits = 18;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 18) | (intValue1 << 14)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 4) & mask;
@@ -947,34 +982,36 @@
       output[30 + outputOffset] = ((intValue16 >>> 28) | (intValue17 << 4)) & mask;
       output[31 + outputOffset] = intValue17 >>> 14;
       outputOffset += 32;
+      inputOffset += 18;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode19(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode19(final int[] input, final int[] output) {
     final int numFrameBits = 19;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 19) | (intValue1 << 13)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 6) & mask;
@@ -1008,35 +1045,37 @@
       output[30 + outputOffset] = ((intValue17 >>> 26) | (intValue18 << 6)) & mask;
       output[31 + outputOffset] = intValue18 >>> 13;
       outputOffset += 32;
+      inputOffset += 19;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode20(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode20(final int[] input, final int[] output) {
     final int numFrameBits = 20;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 20) | (intValue1 << 12)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 8) & mask;
@@ -1070,36 +1109,38 @@
       output[30 + outputOffset] = ((intValue18 >>> 24) | (intValue19 << 8)) & mask;
       output[31 + outputOffset] = intValue19 >>> 12;
       outputOffset += 32;
+      inputOffset += 20;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode21(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode21(final int[] input, final int[] output) {
     final int numFrameBits = 21;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 21) | (intValue1 << 11)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 10) & mask;
@@ -1133,37 +1174,39 @@
       output[30 + outputOffset] = ((intValue19 >>> 22) | (intValue20 << 10)) & mask;
       output[31 + outputOffset] = intValue20 >>> 11;
       outputOffset += 32;
+      inputOffset += 21;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode22(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode22(final int[] input, final int[] output) {
     final int numFrameBits = 22;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 22) | (intValue1 << 10)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 12) | (intValue2 << 20)) & mask;
@@ -1197,38 +1240,40 @@
       output[30 + outputOffset] = ((intValue20 >>> 20) | (intValue21 << 12)) & mask;
       output[31 + outputOffset] = intValue21 >>> 10;
       outputOffset += 32;
+      inputOffset += 22;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode23(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode23(final int[] input, final int[] output) {
     final int numFrameBits = 23;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 23) | (intValue1 << 9)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 14) | (intValue2 << 18)) & mask;
@@ -1262,39 +1307,41 @@
       output[30 + outputOffset] = ((intValue21 >>> 18) | (intValue22 << 14)) & mask;
       output[31 + outputOffset] = intValue22 >>> 9;
       outputOffset += 32;
+      inputOffset += 23;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode24(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode24(final int[] input, final int[] output) {
     final int numFrameBits = 24;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 24) | (intValue1 << 8)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 16) | (intValue2 << 16)) & mask;
@@ -1328,40 +1375,42 @@
       output[30 + outputOffset] = ((intValue22 >>> 16) | (intValue23 << 16)) & mask;
       output[31 + outputOffset] = intValue23 >>> 8;
       outputOffset += 32;
+      inputOffset += 24;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode25(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode25(final int[] input, final int[] output) {
     final int numFrameBits = 25;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
+      final int intValue24 = input[24+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 25) | (intValue1 << 7)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 18) | (intValue2 << 14)) & mask;
@@ -1395,41 +1444,43 @@
       output[30 + outputOffset] = ((intValue23 >>> 14) | (intValue24 << 18)) & mask;
       output[31 + outputOffset] = intValue24 >>> 7;
       outputOffset += 32;
+      inputOffset += 25;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode26(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode26(final int[] input, final int[] output) {
     final int numFrameBits = 26;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
+      final int intValue24 = input[24+inputOffset];
+      final int intValue25 = input[25+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 26) | (intValue1 << 6)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 20) | (intValue2 << 12)) & mask;
@@ -1463,42 +1514,44 @@
       output[30 + outputOffset] = ((intValue24 >>> 12) | (intValue25 << 20)) & mask;
       output[31 + outputOffset] = intValue25 >>> 6;
       outputOffset += 32;
+      inputOffset += 26;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode27(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode27(final int[] input, final int[] output) {
     final int numFrameBits = 27;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
+      final int intValue24 = input[24+inputOffset];
+      final int intValue25 = input[25+inputOffset];
+      final int intValue26 = input[26+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 27) | (intValue1 << 5)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 22) | (intValue2 << 10)) & mask;
@@ -1532,43 +1585,45 @@
       output[30 + outputOffset] = ((intValue25 >>> 10) | (intValue26 << 22)) & mask;
       output[31 + outputOffset] = intValue26 >>> 5;
       outputOffset += 32;
+      inputOffset += 27;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode28(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode28(final int[] input, final int[] output) {
     final int numFrameBits = 28;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
+      final int intValue24 = input[24+inputOffset];
+      final int intValue25 = input[25+inputOffset];
+      final int intValue26 = input[26+inputOffset];
+      final int intValue27 = input[27+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 28) | (intValue1 << 4)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 24) | (intValue2 << 8)) & mask;
@@ -1602,44 +1657,46 @@
       output[30 + outputOffset] = ((intValue26 >>> 8) | (intValue27 << 24)) & mask;
       output[31 + outputOffset] = intValue27 >>> 4;
       outputOffset += 32;
+      inputOffset += 28;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode29(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode29(final int[] input, final int[] output) {
     final int numFrameBits = 29;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
-      int intValue28 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
+      final int intValue24 = input[24+inputOffset];
+      final int intValue25 = input[25+inputOffset];
+      final int intValue26 = input[26+inputOffset];
+      final int intValue27 = input[27+inputOffset];
+      final int intValue28 = input[28+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 29) | (intValue1 << 3)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 26) | (intValue2 << 6)) & mask;
@@ -1673,45 +1730,47 @@
       output[30 + outputOffset] = ((intValue27 >>> 6) | (intValue28 << 26)) & mask;
       output[31 + outputOffset] = intValue28 >>> 3;
       outputOffset += 32;
+      inputOffset += 29;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode30(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode30(final int[] input, final int[] output) {
     final int numFrameBits = 30;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
-      int intValue28 = compressedBuffer.get();
-      int intValue29 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
+      final int intValue24 = input[24+inputOffset];
+      final int intValue25 = input[25+inputOffset];
+      final int intValue26 = input[26+inputOffset];
+      final int intValue27 = input[27+inputOffset];
+      final int intValue28 = input[28+inputOffset];
+      final int intValue29 = input[29+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask;
@@ -1745,46 +1804,48 @@
       output[30 + outputOffset] = ((intValue28 >>> 4) | (intValue29 << 28)) & mask;
       output[31 + outputOffset] = intValue29 >>> 2;
       outputOffset += 32;
+      inputOffset += 30;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode31(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode31(final int[] input, final int[] output) {
     final int numFrameBits = 31;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
-      int intValue28 = compressedBuffer.get();
-      int intValue29 = compressedBuffer.get();
-      int intValue30 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
+      final int intValue24 = input[24+inputOffset];
+      final int intValue25 = input[25+inputOffset];
+      final int intValue26 = input[26+inputOffset];
+      final int intValue27 = input[27+inputOffset];
+      final int intValue28 = input[28+inputOffset];
+      final int intValue29 = input[29+inputOffset];
+      final int intValue30 = input[30+inputOffset];
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 31) | (intValue1 << 1)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 30) | (intValue2 << 2)) & mask;
@@ -1818,47 +1879,49 @@
       output[30 + outputOffset] = ((intValue29 >>> 2) | (intValue30 << 30)) & mask;
       output[31 + outputOffset] = intValue30 >>> 1;
       outputOffset += 32;
+      inputOffset += 31;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode32(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode32(final int[] input, final int[] output) {
     final int numFrameBits = 32;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
-      int intValue28 = compressedBuffer.get();
-      int intValue29 = compressedBuffer.get();
-      int intValue30 = compressedBuffer.get();
-      int intValue31 = compressedBuffer.get();
+      final int intValue0 = input[inputOffset];
+      final int intValue1 = input[1+inputOffset];
+      final int intValue2 = input[2+inputOffset];
+      final int intValue3 = input[3+inputOffset];
+      final int intValue4 = input[4+inputOffset];
+      final int intValue5 = input[5+inputOffset];
+      final int intValue6 = input[6+inputOffset];
+      final int intValue7 = input[7+inputOffset];
+      final int intValue8 = input[8+inputOffset];
+      final int intValue9 = input[9+inputOffset];
+      final int intValue10 = input[10+inputOffset];
+      final int intValue11 = input[11+inputOffset];
+      final int intValue12 = input[12+inputOffset];
+      final int intValue13 = input[13+inputOffset];
+      final int intValue14 = input[14+inputOffset];
+      final int intValue15 = input[15+inputOffset];
+      final int intValue16 = input[16+inputOffset];
+      final int intValue17 = input[17+inputOffset];
+      final int intValue18 = input[18+inputOffset];
+      final int intValue19 = input[19+inputOffset];
+      final int intValue20 = input[20+inputOffset];
+      final int intValue21 = input[21+inputOffset];
+      final int intValue22 = input[22+inputOffset];
+      final int intValue23 = input[23+inputOffset];
+      final int intValue24 = input[24+inputOffset];
+      final int intValue25 = input[25+inputOffset];
+      final int intValue26 = input[26+inputOffset];
+      final int intValue27 = input[27+inputOffset];
+      final int intValue28 = input[28+inputOffset];
+      final int intValue29 = input[29+inputOffset];
+      final int intValue30 = input[30+inputOffset];
+      final int intValue31 = input[31+inputOffset];
       output[0 + outputOffset] = intValue0;
       output[1 + outputOffset] = intValue1;
       output[2 + outputOffset] = intValue2;
@@ -1892,6 +1955,7 @@
       output[30 + outputOffset] = intValue30;
       output[31 + outputOffset] = intValue31;
       outputOffset += 32;
+      inputOffset += 32;
     }
   }
 }
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java	(revision 1352207)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java	(working copy)
@@ -70,24 +70,72 @@
     ForIndexInput(final IndexInput in) throws IOException {
       super(in);
     }
-    class ForBlockReader implements FixedIntBlockIndexInput.BlockReader {
-      byte[] encoded;
+    final class ForBlockReader implements FixedIntBlockIndexInput.BlockReader {
+      byte[] byteEncoded;
+      int[] intEncoded;
       int[] buffer;
       IndexInput in;
       IntBuffer encodedBuffer;
       ForBlockReader(final IndexInput in, final int[] buffer) {
-        this.encoded = new byte[blockSize*8+4];
+        this.byteEncoded = new byte[blockSize*4+4];
+        this.intEncoded = new int[blockSize+1];
         this.in=in;
         this.buffer=buffer;
-        this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
+        this.encodedBuffer=ByteBuffer.wrap(byteEncoded).asIntBuffer();
       }
+      private void convert() {
+        int i=0,j=0,length=byteEncoded.length;
+        for (; i<length; i+=4,j++) {
+          intEncoded[j] = ((byteEncoded[i])   << 24) | 
+                   ((0xff & byteEncoded[i+1]) << 16) | 
+                   ((0xff & byteEncoded[i+2]) << 8)  | 
+                    (0xff & byteEncoded[i+3]);
+        }
+      }
       public void seek(long pos) {}
       // TODO: implement public void skipBlock() {} ?
       public void readBlock() throws IOException {
-        final int numBytes = in.readInt();
-        assert numBytes <= blockSize*8+4;
-        in.readBytes(encoded,0,numBytes);
-        ForUtil.decompress(encodedBuffer,buffer);
+        final int header = in.readInt();
+        int numBits = ((header >> 8) & ((1<<5)-1)) +1 ;
+        int numInts = ((header) & ((1<<8)-1)) +1 ;
+        assert numInts==128;
+        in.readBytes(byteEncoded,0,(numBits*numInts+7)/8);
+        convert();
+
+        switch(numBits) {
+        case 1: ForDecompressImpl.decode1(intEncoded, buffer); break;
+        case 2: ForDecompressImpl.decode2(intEncoded, buffer); break;
+        case 3: ForDecompressImpl.decode3(intEncoded, buffer); break;
+        case 4: ForDecompressImpl.decode4(intEncoded, buffer); break;
+        case 5: ForDecompressImpl.decode5(intEncoded, buffer); break;
+        case 6: ForDecompressImpl.decode6(intEncoded, buffer); break;
+        case 7: ForDecompressImpl.decode7(intEncoded, buffer); break;
+        case 8: ForDecompressImpl.decode8(intEncoded, buffer); break;
+        case 9: ForDecompressImpl.decode9(intEncoded, buffer); break;
+        case 10: ForDecompressImpl.decode10(intEncoded, buffer); break;
+        case 11: ForDecompressImpl.decode11(intEncoded, buffer); break;
+        case 12: ForDecompressImpl.decode12(intEncoded, buffer); break;
+        case 13: ForDecompressImpl.decode13(intEncoded, buffer); break;
+        case 14: ForDecompressImpl.decode14(intEncoded, buffer); break;
+        case 15: ForDecompressImpl.decode15(intEncoded, buffer); break;
+        case 16: ForDecompressImpl.decode16(intEncoded, buffer); break;
+        case 17: ForDecompressImpl.decode17(intEncoded, buffer); break;
+        case 18: ForDecompressImpl.decode18(intEncoded, buffer); break;
+        case 19: ForDecompressImpl.decode19(intEncoded, buffer); break;
+        case 20: ForDecompressImpl.decode20(intEncoded, buffer); break;
+        case 21: ForDecompressImpl.decode21(intEncoded, buffer); break;
+        case 22: ForDecompressImpl.decode22(intEncoded, buffer); break;
+        case 23: ForDecompressImpl.decode23(intEncoded, buffer); break;
+        case 24: ForDecompressImpl.decode24(intEncoded, buffer); break;
+        case 25: ForDecompressImpl.decode25(intEncoded, buffer); break;
+        case 26: ForDecompressImpl.decode26(intEncoded, buffer); break;
+        case 27: ForDecompressImpl.decode27(intEncoded, buffer); break;
+        case 28: ForDecompressImpl.decode28(intEncoded, buffer); break;
+        case 29: ForDecompressImpl.decode29(intEncoded, buffer); break;
+        case 30: ForDecompressImpl.decode30(intEncoded, buffer); break;
+        case 31: ForDecompressImpl.decode31(intEncoded, buffer); break;
+        case 32: ForDecompressImpl.decode32(intEncoded, buffer); break;
+        }
       }
     }
     @Override
@@ -107,7 +155,6 @@
     @Override
     protected void flushBlock() throws IOException {
       final int numBytes = ForUtil.compress(buffer,buffer.length,encodedBuffer);
-      out.writeInt(numBytes);
       out.writeBytes(encoded, numBytes);
     }
   }
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java	(revision 1352207)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java	(working copy)
@@ -44,53 +44,6 @@
     return (HEADER_INT_SIZE+(size*numBits+31)/32)*4;
   }
   
-  public static int decompress(IntBuffer intBuffer, int[] data) {
-    intBuffer.rewind();
-    int header = intBuffer.get();
-
-    int numInts = (header & MASK[8]) + 1;
-    int numBits = ((header >> 8) & MASK[5]) + 1;
-
-    // TODO: ForDecompressImpl is hardewired to size==128 only
-    switch(numBits) {
-      case 1: ForDecompressImpl.decode1(intBuffer, data); break;
-      case 2: ForDecompressImpl.decode2(intBuffer, data); break;
-      case 3: ForDecompressImpl.decode3(intBuffer, data); break;
-      case 4: ForDecompressImpl.decode4(intBuffer, data); break;
-      case 5: ForDecompressImpl.decode5(intBuffer, data); break;
-      case 6: ForDecompressImpl.decode6(intBuffer, data); break;
-      case 7: ForDecompressImpl.decode7(intBuffer, data); break;
-      case 8: ForDecompressImpl.decode8(intBuffer, data); break;
-      case 9: ForDecompressImpl.decode9(intBuffer, data); break;
-      case 10: ForDecompressImpl.decode10(intBuffer, data); break;
-      case 11: ForDecompressImpl.decode11(intBuffer, data); break;
-      case 12: ForDecompressImpl.decode12(intBuffer, data); break;
-      case 13: ForDecompressImpl.decode13(intBuffer, data); break;
-      case 14: ForDecompressImpl.decode14(intBuffer, data); break;
-      case 15: ForDecompressImpl.decode15(intBuffer, data); break;
-      case 16: ForDecompressImpl.decode16(intBuffer, data); break;
-      case 17: ForDecompressImpl.decode17(intBuffer, data); break;
-      case 18: ForDecompressImpl.decode18(intBuffer, data); break;
-      case 19: ForDecompressImpl.decode19(intBuffer, data); break;
-      case 20: ForDecompressImpl.decode20(intBuffer, data); break;
-      case 21: ForDecompressImpl.decode21(intBuffer, data); break;
-      case 22: ForDecompressImpl.decode22(intBuffer, data); break;
-      case 23: ForDecompressImpl.decode23(intBuffer, data); break;
-      case 24: ForDecompressImpl.decode24(intBuffer, data); break;
-      case 25: ForDecompressImpl.decode25(intBuffer, data); break;
-      case 26: ForDecompressImpl.decode26(intBuffer, data); break;
-      case 27: ForDecompressImpl.decode27(intBuffer, data); break;
-      case 28: ForDecompressImpl.decode28(intBuffer, data); break;
-      case 29: ForDecompressImpl.decode29(intBuffer, data); break;
-      case 30: ForDecompressImpl.decode30(intBuffer, data); break;
-      case 31: ForDecompressImpl.decode31(intBuffer, data); break;
-      case 32: ForDecompressImpl.decode32(intBuffer, data); break;
-      default:
-        throw new IllegalStateException("Unknown numFrameBits " + numBits);
-    }
-    return numInts;
-  }
-
   static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits) {
     int header = getHeader(numInts,numBits);
     intBuffer.put(0, header);
Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py	(revision 1352207)
+++ lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py	(working copy)
@@ -20,17 +20,16 @@
 Generate source code for java classes for FOR decompression.
 """
 
-USE_SCRATCH = False
-#USE_SCRATCH = True 
-
 def bitsExpr(i, numFrameBits):
   framePos = i * numFrameBits
   intValNum = (framePos / 32)
   bitPos = framePos % 32
-  if USE_SCRATCH:
-    bitsInInt = "inputInts[" + str(intValNum) + "]"
-  else:
-    bitsInInt = "intValue" + str(intValNum)
+  bitsInInt = "intValue" + str(intValNum)
+#  if intValNum > 0:
+#    bitsInInt = "input[" + str(intValNum) + "+ inputOffset]"
+#  else:
+#    bitsInInt = "input[inputOffset]"
+
   needBrackets = 0
   if bitPos > 0:
     bitsInInt +=  " >>> " + str(bitPos)
@@ -38,10 +37,8 @@
   if bitPos + numFrameBits > 32:
     if needBrackets:
       bitsInInt = "(" + bitsInInt + ")"
-    if USE_SCRATCH:
-      bitsInInt += " | (inputInts[" + str(intValNum+1) + "] << "+ str(32 - bitPos) + ")"
-    else:
-      bitsInInt += " | (intValue" + str(intValNum+1) + " << "+ str(32 - bitPos) + ")"
+    bitsInInt += " | (intValue" + str(intValNum+1) + " << "+ str(32 - bitPos) + ")"
+    #bitsInInt += "| (input[" + str(intValNum+1) + "+ inputOffset]" + " << "+ str(32 - bitPos) + ")"
     needBrackets = 1
   if bitPos + numFrameBits != 32:
     if needBrackets:
@@ -53,7 +50,6 @@
 def genDecompress():
   className = "ForDecompressImpl"
   fileName = className + ".java"
-  imports = "import java.nio.IntBuffer;\n"
   f = open(fileName, 'w')
   w = f.write
   try:
@@ -89,26 +85,25 @@
     for numFrameBits in xrange(1, 33):
 
       w('\n  // NOTE: hardwired to blockSize == 128\n')
-      if USE_SCRATCH:
-        w('  public static void decode%d(final IntBuffer compressedBuffer, final int[] output, final int[] scratch) {\n' % numFrameBits)
-      else:
-        w('  public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits)
+      w('  public static void decode%d(final int[] input, final int[] output) {\n' % numFrameBits)
 
       w('    final int numFrameBits = %d;\n' % numFrameBits)
       w('    final int mask = (int) ((1L<<numFrameBits) - 1);\n')
       w('    int outputOffset = 0;\n')
+      w('    int inputOffset = 0;\n')
       
       w('    for(int step=0;step<4;step++) {\n')
 
-      if USE_SCRATCH:
-        w('      compressedBuffer.get(scratch, 0, %d);\n' % numFrameBits)
-      else:
-        for i in range(numFrameBits): # declare int vars and init from buffer
-          w("      int intValue" + str(i) + " = compressedBuffer.get();\n")
+      for i in range(numFrameBits): # declare int vars and init from buffer
+        if (i>0):
+          w("      final int intValue" + str(i) + " = input["+str(i)+"+inputOffset];\n")
+        else:
+          w("      final int intValue" + str(i) + " = input[inputOffset];\n")
 
       for i in range(32): # set output from int vars
         w("      output[" + str(i) + " + outputOffset] = " + bitsExpr(i, numFrameBits) + ";\n")
       w('      outputOffset += 32;\n')
+      w('      inputOffset += '+str(numFrameBits)+';\n')
       w('    }\n')
       w('  }\n')
     w('}\n')
