commit b35237237c4a4915d1688f9ccdcd7e83219c6e23 Author: Owen O'Malley Date: Wed Oct 29 11:43:10 2014 -0700 HIVE-8650 byte align ORC's rle v2 patched base diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java index 3c737b7..1c35459 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java @@ -149,7 +149,7 @@ private long fixedDelta; private int zzBits90p; private int zzBits100p; - private int brBits95p; + private int brBits90p; private int brBits100p; private int bitsDeltaMax; private int patchWidth; @@ -271,7 +271,7 @@ private void writePatchedBaseValues() throws IOException { // So, if we align base_value then actual_value can not be reconstructed. // write the number of fixed bits required in next 5 bits - final int fb = brBits95p; + final int fb = brBits90p; final int efb = utils.encodeBitWidth(fb) << 1; // adjust variable run length, they are one off @@ -527,9 +527,12 @@ private void determineEncoding() { baseRedLiterals[i] = literals[i] - min; } - // 95th percentile width is used to determine max allowed value + // 90th percentile width is used to determine max allowed value // after which patching will be done - brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.95); + brBits90p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.9); + if (alignedBitpacking) { + brBits90p = utils.getClosestAlignedFixedBits(brBits90p); + } // 100th percentile is used to compute the max patch width brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 1.0); @@ -540,7 +543,7 @@ private void determineEncoding() { // fallback to DIRECT encoding. // The decision to use patched base was based on zigzag values, but the // actual patching is done on base reduced literals. - if ((brBits100p - brBits95p) != 0) { + if ((brBits100p - brBits90p) != 0) { encoding = EncodingType.PATCHED_BASE; preparePatchedBlob(); return; @@ -571,7 +574,7 @@ private void computeZigZagLiterals() { private void preparePatchedBlob() { // mask will be max value beyond which patch will be generated - long mask = (1L << brBits95p) - 1; + long mask = (1L << brBits90p) - 1; // since we are considering only 95 percentile, the size of gap and // patch array can contain only be 5% values @@ -581,7 +584,7 @@ private void preparePatchedBlob() { long[] patchList = new long[patchLength]; // #bit for patch - patchWidth = brBits100p - brBits95p; + patchWidth = brBits100p - brBits90p; patchWidth = utils.getClosestFixedBits(patchWidth); // if patch bit requirement is 64 then it will not possible to pack @@ -589,8 +592,8 @@ private void preparePatchedBlob() { // packed together adjust the patch width if (patchWidth == 64) { patchWidth = 56; - brBits95p = 8; - mask = (1L << brBits95p) - 1; + brBits90p = 8; + mask = (1L << brBits90p) - 1; } int gapIdx = 0; @@ -612,7 +615,7 @@ private void preparePatchedBlob() { gapList[gapIdx++] = gap; // extract the most significant bits that are over mask bits - long patch = baseRedLiterals[i] >>> brBits95p; + long patch = baseRedLiterals[i] >>> brBits90p; patchList[patchIdx++] = patch; // strip off the MSB to enable safe bit packing @@ -680,7 +683,7 @@ private void clear() { fixedDelta = 0; zzBits90p = 0; zzBits100p = 0; - brBits95p = 0; + brBits90p = 0; brBits100p = 0; bitsDeltaMax = 0; patchGapWidth = 0;