Index: core/src/test/org/apache/lucene/codecs/pfor/TestForUtil.java
===================================================================
--- core/src/test/org/apache/lucene/codecs/pfor/TestForUtil.java	(revision 1352207)
+++ core/src/test/org/apache/lucene/codecs/pfor/TestForUtil.java	(working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.pfor;
+package org.apache.lucene;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -23,6 +23,7 @@
 import org.apache.lucene.codecs.pfor.*;
 import org.apache.lucene.util.LuceneTestCase;
 
+// will test nothing since I unfolded this method
 public class TestForUtil extends LuceneTestCase {
   static final int[] MASK={ 0x00000000,
     0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
@@ -43,7 +44,6 @@
     tryForcedException();
     tryAllDistribution();
   }
-
   // Test correctness of ignored forced exception
   public void tryForcedException() throws Exception {
     int sz=128;
@@ -67,13 +67,13 @@
       ensz=sz*8+4;
     }
     resBuffer.rewind();
-    ForUtil.decompress(resBuffer,copy);
+//    ForUtil.decompress(resBuffer,copy); 
 
 //    println(getHex(data,sz)+"\n");
 //    println(getHex(res,ensz)+"\n");
 //    println(getHex(copy,sz)+"\n");
     
-    assert cmp(data,sz,copy,sz)==true;
+//    assert cmp(data,sz,copy,sz)==true;
   }
 
   // Test correctness of compressing and decompressing
@@ -107,13 +107,13 @@
     }
     int[] copy = new int[sz];
 
-    ForUtil.decompress(resBuffer,copy);
+//    ForUtil.decompress(resBuffer,copy);
 
 //    println(getHex(data,sz)+"\n");
 //    println(getHex(res,ensz)+"\n");
 //    println(getHex(copy,sz)+"\n");
 
-    assert cmp(data,sz,copy,sz)==true;
+//    assert cmp(data,sz,copy,sz)==true;
   }
   public boolean cmp(int[] a, int sza, int[] b, int szb) {
     if (sza!=szb)
@@ -126,7 +126,7 @@
     }
     return true;
   }
-  public static String getHex( byte [] raw, int sz ) {
+  public static String getHex(final byte [] raw, int sz ) {
     final String HEXES = "0123456789ABCDEF";
     if ( raw == null ) {
       return null;
@@ -142,7 +142,7 @@
     }
     return hex.toString();
   }
-  public static String getHex( int [] raw, int sz ) {
+  public static String getHex(final int [] raw, int sz ) {
     if ( raw == null ) {
       return null;
     }
Index: core/src/java/org/apache/lucene/codecs/pfor/ForDecompressImpl.java
===================================================================
--- core/src/java/org/apache/lucene/codecs/pfor/ForDecompressImpl.java	(revision 1352207)
+++ core/src/java/org/apache/lucene/codecs/pfor/ForDecompressImpl.java	(working copy)
@@ -22,14 +22,21 @@
 final class ForDecompressImpl {
 
   // nocommit: assess perf of this to see if specializing is really needed
+  private static int getInt(byte b0, byte b1, byte b2, byte b3) {
+    return ( b0  << 24) | 
+           ((0xff & b1) << 16) | 
+           ((0xff & b2) << 8)  | 
+            (0xff & b3);
+  }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode1(final byte[] input, final int[] output) {
     final int numFrameBits = 1;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 1) & mask;
       output[2 + outputOffset] = (intValue0 >>> 2) & mask;
@@ -63,17 +70,19 @@
       output[30 + outputOffset] = (intValue0 >>> 30) & mask;
       output[31 + outputOffset] = intValue0 >>> 31;
       outputOffset += 32;
+      inputOffset += 4;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode2(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode2(final byte[] input, final int[] output) {
     final int numFrameBits = 2;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 2) & mask;
       output[2 + outputOffset] = (intValue0 >>> 4) & mask;
@@ -107,18 +116,20 @@
       output[30 + outputOffset] = (intValue1 >>> 28) & mask;
       output[31 + outputOffset] = intValue1 >>> 30;
       outputOffset += 32;
+      inputOffset += 8;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode3(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode3(final byte[] input, final int[] output) {
     final int numFrameBits = 3;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 3) & mask;
       output[2 + outputOffset] = (intValue0 >>> 6) & mask;
@@ -152,19 +163,21 @@
       output[30 + outputOffset] = (intValue2 >>> 26) & mask;
       output[31 + outputOffset] = intValue2 >>> 29;
       outputOffset += 32;
+      inputOffset += 12;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode4(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode4(final byte[] input, final int[] output) {
     final int numFrameBits = 4;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 4) & mask;
       output[2 + outputOffset] = (intValue0 >>> 8) & mask;
@@ -198,20 +211,22 @@
       output[30 + outputOffset] = (intValue3 >>> 24) & mask;
       output[31 + outputOffset] = intValue3 >>> 28;
       outputOffset += 32;
+      inputOffset += 16;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode5(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode5(final byte[] input, final int[] output) {
     final int numFrameBits = 5;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 5) & mask;
       output[2 + outputOffset] = (intValue0 >>> 10) & mask;
@@ -245,21 +260,23 @@
       output[30 + outputOffset] = (intValue4 >>> 22) & mask;
       output[31 + outputOffset] = intValue4 >>> 27;
       outputOffset += 32;
+      inputOffset += 20;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode6(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode6(final byte[] input, final int[] output) {
     final int numFrameBits = 6;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 6) & mask;
       output[2 + outputOffset] = (intValue0 >>> 12) & mask;
@@ -293,22 +310,24 @@
       output[30 + outputOffset] = (intValue5 >>> 20) & mask;
       output[31 + outputOffset] = intValue5 >>> 26;
       outputOffset += 32;
+      inputOffset += 24;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode7(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode7(final byte[] input, final int[] output) {
     final int numFrameBits = 7;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 7) & mask;
       output[2 + outputOffset] = (intValue0 >>> 14) & mask;
@@ -342,23 +361,25 @@
       output[30 + outputOffset] = (intValue6 >>> 18) & mask;
       output[31 + outputOffset] = intValue6 >>> 25;
       outputOffset += 32;
+      inputOffset += 28;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode8(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode8(final byte[] input, final int[] output) {
     final int numFrameBits = 8;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 8) & mask;
       output[2 + outputOffset] = (intValue0 >>> 16) & mask;
@@ -392,24 +413,26 @@
       output[30 + outputOffset] = (intValue7 >>> 16) & mask;
       output[31 + outputOffset] = intValue7 >>> 24;
       outputOffset += 32;
+      inputOffset += 32;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode9(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode9(final byte[] input, final int[] output) {
     final int numFrameBits = 9;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 9) & mask;
       output[2 + outputOffset] = (intValue0 >>> 18) & mask;
@@ -443,25 +466,27 @@
       output[30 + outputOffset] = (intValue8 >>> 14) & mask;
       output[31 + outputOffset] = intValue8 >>> 23;
       outputOffset += 32;
+      inputOffset += 36;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode10(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode10(final byte[] input, final int[] output) {
     final int numFrameBits = 10;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 10) & mask;
       output[2 + outputOffset] = (intValue0 >>> 20) & mask;
@@ -495,26 +520,28 @@
       output[30 + outputOffset] = (intValue9 >>> 12) & mask;
       output[31 + outputOffset] = intValue9 >>> 22;
       outputOffset += 32;
+      inputOffset += 40;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode11(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode11(final byte[] input, final int[] output) {
     final int numFrameBits = 11;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 11) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 22) | (intValue1 << 10)) & mask;
@@ -548,27 +575,29 @@
       output[30 + outputOffset] = (intValue10 >>> 10) & mask;
       output[31 + outputOffset] = intValue10 >>> 21;
       outputOffset += 32;
+      inputOffset += 44;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode12(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode12(final byte[] input, final int[] output) {
     final int numFrameBits = 12;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 12) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 24) | (intValue1 << 8)) & mask;
@@ -602,28 +631,30 @@
       output[30 + outputOffset] = (intValue11 >>> 8) & mask;
       output[31 + outputOffset] = intValue11 >>> 20;
       outputOffset += 32;
+      inputOffset += 48;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode13(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode13(final byte[] input, final int[] output) {
     final int numFrameBits = 13;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 13) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 26) | (intValue1 << 6)) & mask;
@@ -657,29 +688,31 @@
       output[30 + outputOffset] = (intValue12 >>> 6) & mask;
       output[31 + outputOffset] = intValue12 >>> 19;
       outputOffset += 32;
+      inputOffset += 52;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode14(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode14(final byte[] input, final int[] output) {
     final int numFrameBits = 14;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 14) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 28) | (intValue1 << 4)) & mask;
@@ -713,30 +746,32 @@
       output[30 + outputOffset] = (intValue13 >>> 4) & mask;
       output[31 + outputOffset] = intValue13 >>> 18;
       outputOffset += 32;
+      inputOffset += 56;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode15(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode15(final byte[] input, final int[] output) {
     final int numFrameBits = 15;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = (intValue0 >>> 15) & mask;
       output[2 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask;
@@ -770,31 +805,33 @@
       output[30 + outputOffset] = (intValue14 >>> 2) & mask;
       output[31 + outputOffset] = intValue14 >>> 17;
       outputOffset += 32;
+      inputOffset += 60;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode16(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode16(final byte[] input, final int[] output) {
     final int numFrameBits = 16;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = intValue0 >>> 16;
       output[2 + outputOffset] = intValue1 & mask;
@@ -828,32 +865,34 @@
       output[30 + outputOffset] = intValue15 & mask;
       output[31 + outputOffset] = intValue15 >>> 16;
       outputOffset += 32;
+      inputOffset += 64;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode17(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode17(final byte[] input, final int[] output) {
     final int numFrameBits = 17;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 17) | (intValue1 << 15)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 2) & mask;
@@ -887,33 +926,35 @@
       output[30 + outputOffset] = ((intValue15 >>> 30) | (intValue16 << 2)) & mask;
       output[31 + outputOffset] = intValue16 >>> 15;
       outputOffset += 32;
+      inputOffset += 68;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode18(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode18(final byte[] input, final int[] output) {
     final int numFrameBits = 18;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 18) | (intValue1 << 14)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 4) & mask;
@@ -947,34 +988,36 @@
       output[30 + outputOffset] = ((intValue16 >>> 28) | (intValue17 << 4)) & mask;
       output[31 + outputOffset] = intValue17 >>> 14;
       outputOffset += 32;
+      inputOffset += 72;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode19(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode19(final byte[] input, final int[] output) {
     final int numFrameBits = 19;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 19) | (intValue1 << 13)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 6) & mask;
@@ -1008,35 +1051,37 @@
       output[30 + outputOffset] = ((intValue17 >>> 26) | (intValue18 << 6)) & mask;
       output[31 + outputOffset] = intValue18 >>> 13;
       outputOffset += 32;
+      inputOffset += 76;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode20(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode20(final byte[] input, final int[] output) {
     final int numFrameBits = 20;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 20) | (intValue1 << 12)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 8) & mask;
@@ -1070,36 +1115,38 @@
       output[30 + outputOffset] = ((intValue18 >>> 24) | (intValue19 << 8)) & mask;
       output[31 + outputOffset] = intValue19 >>> 12;
       outputOffset += 32;
+      inputOffset += 80;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode21(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode21(final byte[] input, final int[] output) {
     final int numFrameBits = 21;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 21) | (intValue1 << 11)) & mask;
       output[2 + outputOffset] = (intValue1 >>> 10) & mask;
@@ -1133,37 +1180,39 @@
       output[30 + outputOffset] = ((intValue19 >>> 22) | (intValue20 << 10)) & mask;
       output[31 + outputOffset] = intValue20 >>> 11;
       outputOffset += 32;
+      inputOffset += 84;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode22(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode22(final byte[] input, final int[] output) {
     final int numFrameBits = 22;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 22) | (intValue1 << 10)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 12) | (intValue2 << 20)) & mask;
@@ -1197,38 +1246,40 @@
       output[30 + outputOffset] = ((intValue20 >>> 20) | (intValue21 << 12)) & mask;
       output[31 + outputOffset] = intValue21 >>> 10;
       outputOffset += 32;
+      inputOffset += 88;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode23(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode23(final byte[] input, final int[] output) {
     final int numFrameBits = 23;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 23) | (intValue1 << 9)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 14) | (intValue2 << 18)) & mask;
@@ -1262,39 +1313,41 @@
       output[30 + outputOffset] = ((intValue21 >>> 18) | (intValue22 << 14)) & mask;
       output[31 + outputOffset] = intValue22 >>> 9;
       outputOffset += 32;
+      inputOffset += 92;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode24(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode24(final byte[] input, final int[] output) {
     final int numFrameBits = 24;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 24) | (intValue1 << 8)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 16) | (intValue2 << 16)) & mask;
@@ -1328,40 +1381,42 @@
       output[30 + outputOffset] = ((intValue22 >>> 16) | (intValue23 << 16)) & mask;
       output[31 + outputOffset] = intValue23 >>> 8;
       outputOffset += 32;
+      inputOffset += 96;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode25(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode25(final byte[] input, final int[] output) {
     final int numFrameBits = 25;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
+      final int intValue24 = getInt(input[96+inputOffset], input[97+inputOffset], input[98+inputOffset], input[99+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 25) | (intValue1 << 7)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 18) | (intValue2 << 14)) & mask;
@@ -1395,41 +1450,43 @@
       output[30 + outputOffset] = ((intValue23 >>> 14) | (intValue24 << 18)) & mask;
       output[31 + outputOffset] = intValue24 >>> 7;
       outputOffset += 32;
+      inputOffset += 100;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode26(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode26(final byte[] input, final int[] output) {
     final int numFrameBits = 26;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
+      final int intValue24 = getInt(input[96+inputOffset], input[97+inputOffset], input[98+inputOffset], input[99+inputOffset]);
+      final int intValue25 = getInt(input[100+inputOffset], input[101+inputOffset], input[102+inputOffset], input[103+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 26) | (intValue1 << 6)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 20) | (intValue2 << 12)) & mask;
@@ -1463,42 +1520,44 @@
       output[30 + outputOffset] = ((intValue24 >>> 12) | (intValue25 << 20)) & mask;
       output[31 + outputOffset] = intValue25 >>> 6;
       outputOffset += 32;
+      inputOffset += 104;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode27(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode27(final byte[] input, final int[] output) {
     final int numFrameBits = 27;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
+      final int intValue24 = getInt(input[96+inputOffset], input[97+inputOffset], input[98+inputOffset], input[99+inputOffset]);
+      final int intValue25 = getInt(input[100+inputOffset], input[101+inputOffset], input[102+inputOffset], input[103+inputOffset]);
+      final int intValue26 = getInt(input[104+inputOffset], input[105+inputOffset], input[106+inputOffset], input[107+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 27) | (intValue1 << 5)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 22) | (intValue2 << 10)) & mask;
@@ -1532,43 +1591,45 @@
       output[30 + outputOffset] = ((intValue25 >>> 10) | (intValue26 << 22)) & mask;
       output[31 + outputOffset] = intValue26 >>> 5;
       outputOffset += 32;
+      inputOffset += 108;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode28(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode28(final byte[] input, final int[] output) {
     final int numFrameBits = 28;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
+      final int intValue24 = getInt(input[96+inputOffset], input[97+inputOffset], input[98+inputOffset], input[99+inputOffset]);
+      final int intValue25 = getInt(input[100+inputOffset], input[101+inputOffset], input[102+inputOffset], input[103+inputOffset]);
+      final int intValue26 = getInt(input[104+inputOffset], input[105+inputOffset], input[106+inputOffset], input[107+inputOffset]);
+      final int intValue27 = getInt(input[108+inputOffset], input[109+inputOffset], input[110+inputOffset], input[111+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 28) | (intValue1 << 4)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 24) | (intValue2 << 8)) & mask;
@@ -1602,44 +1663,46 @@
       output[30 + outputOffset] = ((intValue26 >>> 8) | (intValue27 << 24)) & mask;
       output[31 + outputOffset] = intValue27 >>> 4;
       outputOffset += 32;
+      inputOffset += 112;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode29(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode29(final byte[] input, final int[] output) {
     final int numFrameBits = 29;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
-      int intValue28 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
+      final int intValue24 = getInt(input[96+inputOffset], input[97+inputOffset], input[98+inputOffset], input[99+inputOffset]);
+      final int intValue25 = getInt(input[100+inputOffset], input[101+inputOffset], input[102+inputOffset], input[103+inputOffset]);
+      final int intValue26 = getInt(input[104+inputOffset], input[105+inputOffset], input[106+inputOffset], input[107+inputOffset]);
+      final int intValue27 = getInt(input[108+inputOffset], input[109+inputOffset], input[110+inputOffset], input[111+inputOffset]);
+      final int intValue28 = getInt(input[112+inputOffset], input[113+inputOffset], input[114+inputOffset], input[115+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 29) | (intValue1 << 3)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 26) | (intValue2 << 6)) & mask;
@@ -1673,45 +1736,47 @@
       output[30 + outputOffset] = ((intValue27 >>> 6) | (intValue28 << 26)) & mask;
       output[31 + outputOffset] = intValue28 >>> 3;
       outputOffset += 32;
+      inputOffset += 116;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode30(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode30(final byte[] input, final int[] output) {
     final int numFrameBits = 30;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
-      int intValue28 = compressedBuffer.get();
-      int intValue29 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
+      final int intValue24 = getInt(input[96+inputOffset], input[97+inputOffset], input[98+inputOffset], input[99+inputOffset]);
+      final int intValue25 = getInt(input[100+inputOffset], input[101+inputOffset], input[102+inputOffset], input[103+inputOffset]);
+      final int intValue26 = getInt(input[104+inputOffset], input[105+inputOffset], input[106+inputOffset], input[107+inputOffset]);
+      final int intValue27 = getInt(input[108+inputOffset], input[109+inputOffset], input[110+inputOffset], input[111+inputOffset]);
+      final int intValue28 = getInt(input[112+inputOffset], input[113+inputOffset], input[114+inputOffset], input[115+inputOffset]);
+      final int intValue29 = getInt(input[116+inputOffset], input[117+inputOffset], input[118+inputOffset], input[119+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask;
@@ -1745,46 +1810,48 @@
       output[30 + outputOffset] = ((intValue28 >>> 4) | (intValue29 << 28)) & mask;
       output[31 + outputOffset] = intValue29 >>> 2;
       outputOffset += 32;
+      inputOffset += 120;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode31(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode31(final byte[] input, final int[] output) {
     final int numFrameBits = 31;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
-      int intValue28 = compressedBuffer.get();
-      int intValue29 = compressedBuffer.get();
-      int intValue30 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
+      final int intValue24 = getInt(input[96+inputOffset], input[97+inputOffset], input[98+inputOffset], input[99+inputOffset]);
+      final int intValue25 = getInt(input[100+inputOffset], input[101+inputOffset], input[102+inputOffset], input[103+inputOffset]);
+      final int intValue26 = getInt(input[104+inputOffset], input[105+inputOffset], input[106+inputOffset], input[107+inputOffset]);
+      final int intValue27 = getInt(input[108+inputOffset], input[109+inputOffset], input[110+inputOffset], input[111+inputOffset]);
+      final int intValue28 = getInt(input[112+inputOffset], input[113+inputOffset], input[114+inputOffset], input[115+inputOffset]);
+      final int intValue29 = getInt(input[116+inputOffset], input[117+inputOffset], input[118+inputOffset], input[119+inputOffset]);
+      final int intValue30 = getInt(input[120+inputOffset], input[121+inputOffset], input[122+inputOffset], input[123+inputOffset]);
       output[0 + outputOffset] = intValue0 & mask;
       output[1 + outputOffset] = ((intValue0 >>> 31) | (intValue1 << 1)) & mask;
       output[2 + outputOffset] = ((intValue1 >>> 30) | (intValue2 << 2)) & mask;
@@ -1818,47 +1885,49 @@
       output[30 + outputOffset] = ((intValue29 >>> 2) | (intValue30 << 30)) & mask;
       output[31 + outputOffset] = intValue30 >>> 1;
       outputOffset += 32;
+      inputOffset += 124;
     }
   }
 
   // NOTE: hardwired to blockSize == 128
-  public static void decode32(final IntBuffer compressedBuffer, final int[] output) {
+  public static void decode32(final byte[] input, final int[] output) {
     final int numFrameBits = 32;
     final int mask = (int) ((1L<<numFrameBits) - 1);
     int outputOffset = 0;
+    int inputOffset = 0;
     for(int step=0;step<4;step++) {
-      int intValue0 = compressedBuffer.get();
-      int intValue1 = compressedBuffer.get();
-      int intValue2 = compressedBuffer.get();
-      int intValue3 = compressedBuffer.get();
-      int intValue4 = compressedBuffer.get();
-      int intValue5 = compressedBuffer.get();
-      int intValue6 = compressedBuffer.get();
-      int intValue7 = compressedBuffer.get();
-      int intValue8 = compressedBuffer.get();
-      int intValue9 = compressedBuffer.get();
-      int intValue10 = compressedBuffer.get();
-      int intValue11 = compressedBuffer.get();
-      int intValue12 = compressedBuffer.get();
-      int intValue13 = compressedBuffer.get();
-      int intValue14 = compressedBuffer.get();
-      int intValue15 = compressedBuffer.get();
-      int intValue16 = compressedBuffer.get();
-      int intValue17 = compressedBuffer.get();
-      int intValue18 = compressedBuffer.get();
-      int intValue19 = compressedBuffer.get();
-      int intValue20 = compressedBuffer.get();
-      int intValue21 = compressedBuffer.get();
-      int intValue22 = compressedBuffer.get();
-      int intValue23 = compressedBuffer.get();
-      int intValue24 = compressedBuffer.get();
-      int intValue25 = compressedBuffer.get();
-      int intValue26 = compressedBuffer.get();
-      int intValue27 = compressedBuffer.get();
-      int intValue28 = compressedBuffer.get();
-      int intValue29 = compressedBuffer.get();
-      int intValue30 = compressedBuffer.get();
-      int intValue31 = compressedBuffer.get();
+      final int intValue0 = getInt(input[0+inputOffset], input[1+inputOffset], input[2+inputOffset], input[3+inputOffset]);
+      final int intValue1 = getInt(input[4+inputOffset], input[5+inputOffset], input[6+inputOffset], input[7+inputOffset]);
+      final int intValue2 = getInt(input[8+inputOffset], input[9+inputOffset], input[10+inputOffset], input[11+inputOffset]);
+      final int intValue3 = getInt(input[12+inputOffset], input[13+inputOffset], input[14+inputOffset], input[15+inputOffset]);
+      final int intValue4 = getInt(input[16+inputOffset], input[17+inputOffset], input[18+inputOffset], input[19+inputOffset]);
+      final int intValue5 = getInt(input[20+inputOffset], input[21+inputOffset], input[22+inputOffset], input[23+inputOffset]);
+      final int intValue6 = getInt(input[24+inputOffset], input[25+inputOffset], input[26+inputOffset], input[27+inputOffset]);
+      final int intValue7 = getInt(input[28+inputOffset], input[29+inputOffset], input[30+inputOffset], input[31+inputOffset]);
+      final int intValue8 = getInt(input[32+inputOffset], input[33+inputOffset], input[34+inputOffset], input[35+inputOffset]);
+      final int intValue9 = getInt(input[36+inputOffset], input[37+inputOffset], input[38+inputOffset], input[39+inputOffset]);
+      final int intValue10 = getInt(input[40+inputOffset], input[41+inputOffset], input[42+inputOffset], input[43+inputOffset]);
+      final int intValue11 = getInt(input[44+inputOffset], input[45+inputOffset], input[46+inputOffset], input[47+inputOffset]);
+      final int intValue12 = getInt(input[48+inputOffset], input[49+inputOffset], input[50+inputOffset], input[51+inputOffset]);
+      final int intValue13 = getInt(input[52+inputOffset], input[53+inputOffset], input[54+inputOffset], input[55+inputOffset]);
+      final int intValue14 = getInt(input[56+inputOffset], input[57+inputOffset], input[58+inputOffset], input[59+inputOffset]);
+      final int intValue15 = getInt(input[60+inputOffset], input[61+inputOffset], input[62+inputOffset], input[63+inputOffset]);
+      final int intValue16 = getInt(input[64+inputOffset], input[65+inputOffset], input[66+inputOffset], input[67+inputOffset]);
+      final int intValue17 = getInt(input[68+inputOffset], input[69+inputOffset], input[70+inputOffset], input[71+inputOffset]);
+      final int intValue18 = getInt(input[72+inputOffset], input[73+inputOffset], input[74+inputOffset], input[75+inputOffset]);
+      final int intValue19 = getInt(input[76+inputOffset], input[77+inputOffset], input[78+inputOffset], input[79+inputOffset]);
+      final int intValue20 = getInt(input[80+inputOffset], input[81+inputOffset], input[82+inputOffset], input[83+inputOffset]);
+      final int intValue21 = getInt(input[84+inputOffset], input[85+inputOffset], input[86+inputOffset], input[87+inputOffset]);
+      final int intValue22 = getInt(input[88+inputOffset], input[89+inputOffset], input[90+inputOffset], input[91+inputOffset]);
+      final int intValue23 = getInt(input[92+inputOffset], input[93+inputOffset], input[94+inputOffset], input[95+inputOffset]);
+      final int intValue24 = getInt(input[96+inputOffset], input[97+inputOffset], input[98+inputOffset], input[99+inputOffset]);
+      final int intValue25 = getInt(input[100+inputOffset], input[101+inputOffset], input[102+inputOffset], input[103+inputOffset]);
+      final int intValue26 = getInt(input[104+inputOffset], input[105+inputOffset], input[106+inputOffset], input[107+inputOffset]);
+      final int intValue27 = getInt(input[108+inputOffset], input[109+inputOffset], input[110+inputOffset], input[111+inputOffset]);
+      final int intValue28 = getInt(input[112+inputOffset], input[113+inputOffset], input[114+inputOffset], input[115+inputOffset]);
+      final int intValue29 = getInt(input[116+inputOffset], input[117+inputOffset], input[118+inputOffset], input[119+inputOffset]);
+      final int intValue30 = getInt(input[120+inputOffset], input[121+inputOffset], input[122+inputOffset], input[123+inputOffset]);
+      final int intValue31 = getInt(input[124+inputOffset], input[125+inputOffset], input[126+inputOffset], input[127+inputOffset]);
       output[0 + outputOffset] = intValue0;
       output[1 + outputOffset] = intValue1;
       output[2 + outputOffset] = intValue2;
@@ -1892,6 +1961,7 @@
       output[30 + outputOffset] = intValue30;
       output[31 + outputOffset] = intValue31;
       outputOffset += 32;
+      inputOffset += 128;
     }
   }
 }
Index: core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
===================================================================
--- core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java	(revision 1352207)
+++ core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java	(working copy)
@@ -64,30 +64,72 @@
     FixedIntBlockIndexInput ret = new ForIndexInput(dir.openInput(fileName, context));
     return ret;
   }
-
+  private void convert(final byte[] byteEncoded, int[] intEncoded) {
+    int i=0,j=0,length=byteEncoded.length;
+    for (; i<length; i+=4,j++) {
+      intEncoded[j] = ((byteEncoded[i])   << 24) | 
+               ((0xff & byteEncoded[i+1]) << 16) | 
+               ((0xff & byteEncoded[i+2]) << 8)  | 
+                (0xff & byteEncoded[i+3]);
+    }
+  }
   // wrap input and output with buffer support
   private class ForIndexInput extends FixedIntBlockIndexInput {
     ForIndexInput(final IndexInput in) throws IOException {
       super(in);
     }
     class ForBlockReader implements FixedIntBlockIndexInput.BlockReader {
-      byte[] encoded;
+      byte[] byteEncoded;
       int[] buffer;
       IndexInput in;
-      IntBuffer encodedBuffer;
       ForBlockReader(final IndexInput in, final int[] buffer) {
-        this.encoded = new byte[blockSize*8+4];
+        this.byteEncoded = new byte[blockSize*4+4];
         this.in=in;
         this.buffer=buffer;
-        this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
       }
       public void seek(long pos) {}
       // TODO: implement public void skipBlock() {} ?
       public void readBlock() throws IOException {
-        final int numBytes = in.readInt();
-        assert numBytes <= blockSize*8+4;
-        in.readBytes(encoded,0,numBytes);
-        ForUtil.decompress(encodedBuffer,buffer);
+        final int header = in.readInt();
+        int numBits = ((header >> 8) & ((1<<5)-1)) +1 ;
+        int numInts = ((header) & ((1<<8)-1)) +1 ;
+        assert numInts==128;
+        in.readBytes(byteEncoded,0,(numBits*numInts+7)/8);
+
+        switch(numBits) {
+        case 1: ForDecompressImpl.decode1(byteEncoded, buffer); break;
+        case 2: ForDecompressImpl.decode2(byteEncoded, buffer); break;
+        case 3: ForDecompressImpl.decode3(byteEncoded, buffer); break;
+        case 4: ForDecompressImpl.decode4(byteEncoded, buffer); break;
+        case 5: ForDecompressImpl.decode5(byteEncoded, buffer); break;
+        case 6: ForDecompressImpl.decode6(byteEncoded, buffer); break;
+        case 7: ForDecompressImpl.decode7(byteEncoded, buffer); break;
+        case 8: ForDecompressImpl.decode8(byteEncoded, buffer); break;
+        case 9: ForDecompressImpl.decode9(byteEncoded, buffer); break;
+        case 10: ForDecompressImpl.decode10(byteEncoded, buffer); break;
+        case 11: ForDecompressImpl.decode11(byteEncoded, buffer); break;
+        case 12: ForDecompressImpl.decode12(byteEncoded, buffer); break;
+        case 13: ForDecompressImpl.decode13(byteEncoded, buffer); break;
+        case 14: ForDecompressImpl.decode14(byteEncoded, buffer); break;
+        case 15: ForDecompressImpl.decode15(byteEncoded, buffer); break;
+        case 16: ForDecompressImpl.decode16(byteEncoded, buffer); break;
+        case 17: ForDecompressImpl.decode17(byteEncoded, buffer); break;
+        case 18: ForDecompressImpl.decode18(byteEncoded, buffer); break;
+        case 19: ForDecompressImpl.decode19(byteEncoded, buffer); break;
+        case 20: ForDecompressImpl.decode20(byteEncoded, buffer); break;
+        case 21: ForDecompressImpl.decode21(byteEncoded, buffer); break;
+        case 22: ForDecompressImpl.decode22(byteEncoded, buffer); break;
+        case 23: ForDecompressImpl.decode23(byteEncoded, buffer); break;
+        case 24: ForDecompressImpl.decode24(byteEncoded, buffer); break;
+        case 25: ForDecompressImpl.decode25(byteEncoded, buffer); break;
+        case 26: ForDecompressImpl.decode26(byteEncoded, buffer); break;
+        case 27: ForDecompressImpl.decode27(byteEncoded, buffer); break;
+        case 28: ForDecompressImpl.decode28(byteEncoded, buffer); break;
+        case 29: ForDecompressImpl.decode29(byteEncoded, buffer); break;
+        case 30: ForDecompressImpl.decode30(byteEncoded, buffer); break;
+        case 31: ForDecompressImpl.decode31(byteEncoded, buffer); break;
+        case 32: ForDecompressImpl.decode32(byteEncoded, buffer); break;
+        }
       }
     }
     @Override
@@ -107,8 +149,41 @@
     @Override
     protected void flushBlock() throws IOException {
       final int numBytes = ForUtil.compress(buffer,buffer.length,encodedBuffer);
-      out.writeInt(numBytes);
       out.writeBytes(encoded, numBytes);
     }
   }
+  public static String getHex(final byte [] raw, int sz ) {
+    final String HEXES = "0123456789ABCDEF";
+    if ( raw == null ) {
+      return null;
+    }
+    final StringBuilder hex = new StringBuilder( 2 * raw.length );
+    for ( int i=0; i<sz; i++ ) {
+      if (i>0 && (i)%16 == 0)
+        hex.append("\n");
+      byte b=raw[i];
+      hex.append(HEXES.charAt((b & 0xF0) >> 4))
+         .append(HEXES.charAt((b & 0x0F)))
+         .append(" ");
+    }
+    return hex.toString();
+  }
+  public static String getHex(final int [] raw, int sz ) {
+    if ( raw == null ) {
+      return null;
+    }
+    final StringBuilder hex = new StringBuilder( 4 * raw.length );
+    for ( int i=0; i<sz; i++ ) {
+      if (i>0 && i%8 == 0)
+        hex.append("\n");
+      hex.append(String.format("%08x ",raw[i]));
+    }
+    return hex.toString();
+  }
+  static void println(String format, Object... args) {
+    System.out.println(String.format(format,args)); 
+  }
+  static void print(String format, Object... args) {
+    System.out.print(String.format(format,args)); 
+  }
 }
Index: core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
===================================================================
--- core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java	(revision 1352207)
+++ core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java	(working copy)
@@ -44,53 +44,6 @@
     return (HEADER_INT_SIZE+(size*numBits+31)/32)*4;
   }
   
-  public static int decompress(IntBuffer intBuffer, int[] data) {
-    intBuffer.rewind();
-    int header = intBuffer.get();
-
-    int numInts = (header & MASK[8]) + 1;
-    int numBits = ((header >> 8) & MASK[5]) + 1;
-
-    // TODO: ForDecompressImpl is hardewired to size==128 only
-    switch(numBits) {
-      case 1: ForDecompressImpl.decode1(intBuffer, data); break;
-      case 2: ForDecompressImpl.decode2(intBuffer, data); break;
-      case 3: ForDecompressImpl.decode3(intBuffer, data); break;
-      case 4: ForDecompressImpl.decode4(intBuffer, data); break;
-      case 5: ForDecompressImpl.decode5(intBuffer, data); break;
-      case 6: ForDecompressImpl.decode6(intBuffer, data); break;
-      case 7: ForDecompressImpl.decode7(intBuffer, data); break;
-      case 8: ForDecompressImpl.decode8(intBuffer, data); break;
-      case 9: ForDecompressImpl.decode9(intBuffer, data); break;
-      case 10: ForDecompressImpl.decode10(intBuffer, data); break;
-      case 11: ForDecompressImpl.decode11(intBuffer, data); break;
-      case 12: ForDecompressImpl.decode12(intBuffer, data); break;
-      case 13: ForDecompressImpl.decode13(intBuffer, data); break;
-      case 14: ForDecompressImpl.decode14(intBuffer, data); break;
-      case 15: ForDecompressImpl.decode15(intBuffer, data); break;
-      case 16: ForDecompressImpl.decode16(intBuffer, data); break;
-      case 17: ForDecompressImpl.decode17(intBuffer, data); break;
-      case 18: ForDecompressImpl.decode18(intBuffer, data); break;
-      case 19: ForDecompressImpl.decode19(intBuffer, data); break;
-      case 20: ForDecompressImpl.decode20(intBuffer, data); break;
-      case 21: ForDecompressImpl.decode21(intBuffer, data); break;
-      case 22: ForDecompressImpl.decode22(intBuffer, data); break;
-      case 23: ForDecompressImpl.decode23(intBuffer, data); break;
-      case 24: ForDecompressImpl.decode24(intBuffer, data); break;
-      case 25: ForDecompressImpl.decode25(intBuffer, data); break;
-      case 26: ForDecompressImpl.decode26(intBuffer, data); break;
-      case 27: ForDecompressImpl.decode27(intBuffer, data); break;
-      case 28: ForDecompressImpl.decode28(intBuffer, data); break;
-      case 29: ForDecompressImpl.decode29(intBuffer, data); break;
-      case 30: ForDecompressImpl.decode30(intBuffer, data); break;
-      case 31: ForDecompressImpl.decode31(intBuffer, data); break;
-      case 32: ForDecompressImpl.decode32(intBuffer, data); break;
-      default:
-        throw new IllegalStateException("Unknown numFrameBits " + numBits);
-    }
-    return numInts;
-  }
-
   static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits) {
     int header = getHeader(numInts,numBits);
     intBuffer.put(0, header);
Index: core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
===================================================================
--- core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py	(revision 1352207)
+++ core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py	(working copy)
@@ -20,17 +20,16 @@
 Generate source code for java classes for FOR decompression.
 """
 
-USE_SCRATCH = False
-#USE_SCRATCH = True 
-
 def bitsExpr(i, numFrameBits):
   framePos = i * numFrameBits
   intValNum = (framePos / 32)
   bitPos = framePos % 32
-  if USE_SCRATCH:
-    bitsInInt = "inputInts[" + str(intValNum) + "]"
-  else:
-    bitsInInt = "intValue" + str(intValNum)
+  bitsInInt = "intValue" + str(intValNum)
+#  if intValNum > 0:
+#    bitsInInt = "input[" + str(intValNum) + "+ inputOffset]"
+#  else:
+#    bitsInInt = "input[inputOffset]"
+
   needBrackets = 0
   if bitPos > 0:
     bitsInInt +=  " >>> " + str(bitPos)
@@ -38,10 +37,8 @@
   if bitPos + numFrameBits > 32:
     if needBrackets:
       bitsInInt = "(" + bitsInInt + ")"
-    if USE_SCRATCH:
-      bitsInInt += " | (inputInts[" + str(intValNum+1) + "] << "+ str(32 - bitPos) + ")"
-    else:
-      bitsInInt += " | (intValue" + str(intValNum+1) + " << "+ str(32 - bitPos) + ")"
+    bitsInInt += " | (intValue" + str(intValNum+1) + " << "+ str(32 - bitPos) + ")"
+#    bitsInInt += "| (input[" + str(intValNum+1) + "+ inputOffset]" + " << "+ str(32 - bitPos) + ")"
     needBrackets = 1
   if bitPos + numFrameBits != 32:
     if needBrackets:
@@ -53,7 +50,6 @@
 def genDecompress():
   className = "ForDecompressImpl"
   fileName = className + ".java"
-  imports = "import java.nio.IntBuffer;\n"
   f = open(fileName, 'w')
   w = f.write
   try:
@@ -83,32 +79,40 @@
     w("final class ForDecompressImpl {\n")
 
     w('\n  // nocommit: assess perf of this to see if specializing is really needed\n')
+    w('  private static int getInt(byte b0, byte b1, byte b2, byte b3) {\n')
+    w('    return ( b0  << 24) | \n')
+    w('           ((0xff & b1) << 16) | \n')
+    w('           ((0xff & b2) << 8)  | \n')
+    w('            (0xff & b3);\n')
+    w('  }\n')
 
     # previous version only handle int less(or equal) than 31 bits
     # try to support 32 bits here
     for numFrameBits in xrange(1, 33):
 
       w('\n  // NOTE: hardwired to blockSize == 128\n')
-      if USE_SCRATCH:
-        w('  public static void decode%d(final IntBuffer compressedBuffer, final int[] output, final int[] scratch) {\n' % numFrameBits)
-      else:
-        w('  public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits)
+      w('  public static void decode%d(final byte[] input, final int[] output) {\n' % numFrameBits)
 
       w('    final int numFrameBits = %d;\n' % numFrameBits)
       w('    final int mask = (int) ((1L<<numFrameBits) - 1);\n')
       w('    int outputOffset = 0;\n')
+      w('    int inputOffset = 0;\n')
       
       w('    for(int step=0;step<4;step++) {\n')
 
-      if USE_SCRATCH:
-        w('      compressedBuffer.get(scratch, 0, %d);\n' % numFrameBits)
-      else:
-        for i in range(numFrameBits): # declare int vars and init from buffer
-          w("      int intValue" + str(i) + " = compressedBuffer.get();\n")
+      for i in range(numFrameBits): # declare int vars and init from buffer
+        w("      final int intValue" + str(i) + " = getInt(")
+        params=[]
+        for j in range(4):
+          params.append('input['+str(i*4+j)+'+inputOffset]')
+        w(', '.join(params))
+        w(');\n')
+        #w("      final int intValue" + str(i) + " = input["+str(i)+"+inputOffset];\n")
 
       for i in range(32): # set output from int vars
         w("      output[" + str(i) + " + outputOffset] = " + bitsExpr(i, numFrameBits) + ";\n")
       w('      outputOffset += 32;\n')
+      w('      inputOffset += '+str(numFrameBits*4)+';\n')
       w('    }\n')
       w('  }\n')
     w('}\n')
