diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java index 3a0ba1d..1ef8209 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java @@ -142,9 +142,9 @@ private final boolean signed; private EncodingType encoding; private int numLiterals; - private long[] zigzagLiterals; - private long[] baseRedLiterals; - private long[] adjDeltas; + private final long[] zigzagLiterals = new long[MAX_SCOPE]; + private final long[] baseRedLiterals = new long[MAX_SCOPE]; + private final long[] adjDeltas = new long[MAX_SCOPE]; private long fixedDelta; private int zzBits90p; private int zzBits100p; @@ -253,7 +253,7 @@ private void writeDeltaValues() throws IOException { utils.writeVslong(output, adjDeltas[0]); // adjacent delta values are bit packed - utils.writeInts(adjDeltas, 1, adjDeltas.length - 1, fb, output); + utils.writeInts(adjDeltas, 1, numLiterals - 2, fb, output); } } @@ -323,7 +323,7 @@ private void writePatchedBaseValues() throws IOException { // base reduced literals are bit packed int closestFixedBits = utils.getClosestFixedBits(fb); - utils.writeInts(baseRedLiterals, 0, baseRedLiterals.length, closestFixedBits, + utils.writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits, output); // write patch list @@ -372,7 +372,7 @@ private void writeDirectValues() throws IOException { output.write(headerSecondByte); // bit packing the zigzag encoded literals - utils.writeInts(zigzagLiterals, 0, zigzagLiterals.length, fb, output); + utils.writeInts(zigzagLiterals, 0, numLiterals, fb, output); // reset run length variableRunLength = 0; @@ -414,14 +414,6 @@ private void writeShortRepeatValues() throws IOException { } private void determineEncoding() { - // used for direct encoding - zigzagLiterals = new long[numLiterals]; - - // used for patched base encoding - baseRedLiterals = new long[numLiterals]; - - // used for delta encoding - adjDeltas = new long[numLiterals - 1]; int idx = 0; @@ -530,10 +522,10 @@ private void determineEncoding() { // is not significant then we can use direct or delta encoding double p = 0.9; - zzBits90p = utils.percentileBits(zigzagLiterals, p); + zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p); p = 1.0; - zzBits100p = utils.percentileBits(zigzagLiterals, p); + zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p); int diffBitsLH = zzBits100p - zzBits90p; @@ -543,18 +535,18 @@ private void determineEncoding() { && isFixedDelta == false) { // patching is done only on base reduced values. // remove base from literals - for(int i = 0; i < zigzagLiterals.length; i++) { + for(int i = 0; i < numLiterals; i++) { baseRedLiterals[i] = literals[i] - min; } // 95th percentile width is used to determine max allowed value // after which patching will be done p = 0.95; - brBits95p = utils.percentileBits(baseRedLiterals, p); + brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p); // 100th percentile is used to compute the max patch width p = 1.0; - brBits100p = utils.percentileBits(baseRedLiterals, p); + brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p); // after base reducing the values, if the difference in bits between // 95th percentile and 100th percentile value is zero then there @@ -592,7 +584,7 @@ private void preparePatchedBlob() { // since we are considering only 95 percentile, the size of gap and // patch array can contain only be 5% values - patchLength = (int) Math.ceil((baseRedLiterals.length * 0.05)); + patchLength = (int) Math.ceil((numLiterals * 0.05)); int[] gapList = new int[patchLength]; long[] patchList = new long[patchLength]; @@ -616,7 +608,7 @@ private void preparePatchedBlob() { int gap = 0; int maxGap = 0; - for(int i = 0; i < baseRedLiterals.length; i++) { + for(int i = 0; i < numLiterals; i++) { // if value is above mask then create the patch and record the gap if (baseRedLiterals[i] > mask) { gap = i - prev; @@ -694,9 +686,6 @@ private void clear() { numLiterals = 0; encoding = null; prevDelta = 0; - zigzagLiterals = null; - baseRedLiterals = null; - adjDeltas = null; fixedDelta = 0; zzBits90p = 0; zzBits100p = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java index 71c1c4d..b5380c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java @@ -244,7 +244,7 @@ long zigzagDecode(long val) { * @param p - percentile value (>=0.0 to <=1.0) * @return pth percentile bits */ - int percentileBits(long[] data, double p) { + int percentileBits(long[] data, int offset, int length, double p) { if ((p > 1.0) || (p <= 0.0)) { return -1; } @@ -254,13 +254,12 @@ int percentileBits(long[] data, double p) { int[] hist = new int[32]; // compute the histogram - for(long l : data) { - int idx = encodeBitWidth(findClosestNumBits(l)); + for(int i = offset; i < (offset + length); i++) { + int idx = encodeBitWidth(findClosestNumBits(data[i])); hist[idx] += 1; } - int len = data.length; - int perLen = (int) (len * (1.0 - p)); + int perLen = (int) (length * (1.0 - p)); // return the bits required by pth percentile length for(int i = hist.length - 1; i >= 0; i--) {