commit 16b95caabbae45d685eba7d7cde43512da59e7fa Author: Owen O'Malley Date: Wed Oct 29 08:25:09 2014 -0700 HIVE-8644 fix converting floats to integers. diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java index 6344a66..3c737b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.io.orc; import java.io.IOException; +import java.nio.ByteBuffer; /** * A writer that performs light weight compression over sequence of integers. @@ -829,4 +830,63 @@ public void getPosition(PositionRecorder recorder) throws IOException { output.getPosition(recorder); recorder.addPosition(numLiterals); } + + /** + * A class that prints the bytes generated by the encoder. + * Used for testing only. + */ + private static class OutputPrinter implements OutStream.OutputReceiver { + int count = 0; + + @Override + public void output(ByteBuffer buffer) throws IOException { + while (buffer.hasRemaining()) { + int val = buffer.get() & 0xff; + if (val < 16) { + System.out.print('0'); + } + System.out.print(Integer.toHexString(val)); + if (count++ > 15) { + count = 0; + System.out.println(); + } else { + System.out.print(' '); + } + } + } + } + + /** + * Given a list of longs and whether the type is signed, print the serialized + * bytes. + * @param signed is the type signed + * @param values the values to encode + * @throws Exception + */ + private static void printConversion(boolean signed, + long[] values) throws IOException { + OutStream stream = new OutStream("test", 128000, null, new OutputPrinter()); + RunLengthIntegerWriterV2 writer = new RunLengthIntegerWriterV2(stream, + signed); + for(int i=0; i < values.length; ++i) { + System.out.println("value["+i+"]=" + values[i]); + writer.write(values[i]); + } + writer.flush(); + System.out.println(); + } + + /** + * Provide a main function to manually test different sequences of longs. + * The first parameter is 'u' for unsigned or 's' for signed; followed by + * the numbers to encode. + */ + public static void main(String[] args) throws Exception { + boolean signed = "s".equals(args[0]); + long[] values = new long[args.length - 1]; + for(int i=0; i < values.length; ++i) { + values[i] = Long.parseLong(args[i+1]); + } + printConversion(signed, values); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java index b14fa7b..3524206 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java @@ -259,7 +259,7 @@ int percentileBits(long[] data, int offset, int length, double p) { hist[idx] += 1; } - int perLen = (int) (length * (1.0 - p)); + int perLen = (int) Math.round(length * (1.0 - p)); // return the bits required by pth percentile length for(int i = hist.length - 1; i >= 0; i--) { @@ -274,7 +274,8 @@ int percentileBits(long[] data, int offset, int length, double p) { /** * Read n bytes in big endian order and convert to long - * @param b - byte array + * @param input the input stream + * @param n the number of bytes to read * @return long value */ long bytesToLongBE(InStream input, int n) throws IOException {