diff --git common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java index 3939511..40629bc 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java +++ common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java @@ -16,6 +16,7 @@ package org.apache.hadoop.hive.common.type; import java.math.BigDecimal; +import java.math.BigInteger; import java.nio.IntBuffer; /** @@ -550,6 +551,24 @@ public void update32(int[] array, int offset) { } /** + * Updates the value of this object with the given {@link BigInteger} and scale. + * + * @param bigInt + * {@link BigInteger} + * @param scale + * scale of the decimal + */ + public void updateBigInteger(BigInteger bigInt, short scale) { + this.scale = scale; + this.signum = (byte) bigInt.compareTo(BigInteger.ZERO); + if (signum < 0) { + unscaledValue.updateBigInteger(bigInt.negate()); + } else { + unscaledValue.updateBigInteger(bigInt); + } + } + + /** * Updates the value of this object with the given string. * * @param str diff --git common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java index d71ebb3..1c45d37 100644 --- common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java +++ common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java @@ -117,11 +117,7 @@ public UnsignedInt128(UnsignedInt128 o) { * v3 */ public UnsignedInt128(int v0, int v1, int v2, int v3) { - this.v[0] = v0; - this.v[1] = v1; - this.v[2] = v2; - this.v[3] = v3; - updateCount(); + update(v0, v1, v2, v3); } /** @@ -158,6 +154,30 @@ public UnsignedInt128(char[] str, int offset, int length) { update(str, offset, length); } + /** + * Constructs from the given BigInteger + * + * @param bigInt + * java BigInteger + */ + public UnsignedInt128(BigInteger bigInt) { + updateBigInteger(bigInt); + } + + /** + * Updates the value of this object from the given {@link BigInteger}. + * + * @param bigInt + * java BigInteger + */ + public void updateBigInteger(BigInteger bigInt) { + int v0 = bigInt.intValue(); + int v1 = bigInt.shiftRight(32).intValue(); + int v2 = bigInt.shiftRight(64).intValue(); + int v3 = bigInt.shiftRight(96).intValue(); + update(v0, v1, v2, v3); + } + /** @return v[0] */ public int getV0() { return v[0]; diff --git common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java index fbb2aa0..9ac68fe 100644 --- common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java +++ common/src/test/org/apache/hadoop/hive/common/type/TestUnsignedInt128.java @@ -547,4 +547,13 @@ public void testDivideDestructiveUnsignedInt128Again() { BigInteger ans = bigInteger1.divide(bigInteger2); assertEquals(ans, complicated1.toBigIntegerSlow()); } + + @Test + public void testBigIntConversion() { + BigInteger bigInteger = BigInteger.valueOf(0x1ABCDEF0123456L); + UnsignedInt128 uInt128 = new UnsignedInt128(bigInteger); + System.out.println("Out = "+uInt128.toString()); + System.out.println("Out = "+bigInteger.toString()); + assertEquals(bigInteger, uInt128.toBigIntegerSlow()); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 23564bb..fe773b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; public class DecimalColumnVector extends ColumnVector { @@ -37,6 +40,8 @@ public short scale; public short precision; + private final HiveDecimalWritable writableObj = new HiveDecimalWritable(); + public DecimalColumnVector(int precision, int scale) { super(VectorizedRowBatch.DEFAULT_SIZE); this.precision = (short) precision; @@ -50,8 +55,16 @@ public DecimalColumnVector(int precision, int scale) { @Override public Writable getWritableObject(int index) { - // TODO Auto-generated method stub - return null; + if (isRepeating) { + index = 0; + } + if (!noNulls && isNull[index]) { + return NullWritable.get(); + } else { + Decimal128 dec = vector[index]; + writableObj.set(HiveDecimal.create(dec.toBigDecimal())); + return writableObj; + } } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 0876bf7..c7695ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -19,6 +19,7 @@ import java.io.EOFException; import java.io.IOException; +import java.math.BigInteger; import java.nio.ByteBuffer; import java.sql.Date; import java.sql.Timestamp; @@ -33,12 +34,10 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.common.type.UnsignedInt128; +import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; @@ -1043,6 +1042,7 @@ void skipRows(long items) throws IOException { private static class DecimalTreeReader extends TreeReader{ private InStream valueStream; private IntegerReader scaleStream = null; + private LongColumnVector scratchScaleVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); private final int precision; private final int scale; @@ -1093,8 +1093,37 @@ Object next(Object previous) throws IOException { @Override Object nextVector(Object previousVector, long batchSize) throws IOException { - throw new UnsupportedOperationException( - "NextVector is not supported operation for Decimal type"); + DecimalColumnVector result = null; + if (previousVector == null) { + result = new DecimalColumnVector(precision, scale); + } else { + result = (DecimalColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + boolean [] scratchIsNull = scratchScaleVector.isNull; + scratchScaleVector.isNull = result.isNull; + + // Read value entries based on isNull entries + if (result.isRepeating) { + if (!result.isNull[0]) { + BigInteger bInt = SerializationUtils.readBigInteger(valueStream); + short scale = (short) scaleStream.next(); + result.vector[0].updateBigInteger(bInt, scale); + } + } else { + scaleStream.nextVector(scratchScaleVector, batchSize); + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + BigInteger bInt = SerializationUtils.readBigInteger(valueStream); + result.vector[i].updateBigInteger(bInt, (short) scratchScaleVector.vector[i]); + } + } + } + // Switch back the null vector. + scratchScaleVector.isNull = scratchIsNull; + return result; } @Override diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index 0d5b7ff..b2291e1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -22,8 +22,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.io.BooleanWritable; @@ -71,9 +74,10 @@ public void openFileSystem() throws Exception { private final String k; private final Timestamp t; private final Date dt; + private final HiveDecimal hd; MyRecord(Boolean bo, Byte by, Integer i, Long l, Short s, Double d, String k, Timestamp t, - Date dt) { + Date dt, HiveDecimal hd) { this.bo = bo; this.by = by; this.i = i; @@ -83,6 +87,7 @@ public void openFileSystem() throws Exception { this.k = k; this.t = t; this.dt = dt; + this.hd = hd; } } @@ -109,13 +114,15 @@ public void createFile() throws Exception { "Heaven,", "we", "were", "all", "going", "direct", "the", "other", "way"}; String[] dates = new String[] {"1991-02-28", "1970-01-31", "1950-04-23"}; + String[] decimalStrings = new String[] {"234.443", "10001000", "0.3333367", "67788798.0", "-234.443", + "-10001000", "-0.3333367", "-67788798.0", "0"}; for (int i = 0; i < 21000; ++i) { if ((i % 7) != 0) { writer.addRow(new MyRecord(((i % 3) == 0), (byte)(i % 5), i, (long) 200, (short) (300 + i), (double) (400 + i), words[r1.nextInt(words.length)], new Timestamp(Calendar.getInstance().getTime().getTime()), - Date.valueOf(dates[i % 3]))); + Date.valueOf(dates[i % 3]), HiveDecimal.create(decimalStrings[i % decimalStrings.length]))); } else { - writer.addRow(new MyRecord(null, null, i, (long) 200, null, null, null, null, null)); + writer.addRow(new MyRecord(null, null, i, (long) 200, null, null, null, null, null, null)); } } writer.close(); @@ -162,6 +169,14 @@ private void checkVectorizedReader() throws Exception { Assert.assertEquals(adt.getTime(), DateWritable.daysToMillis((int) ((LongWritable) b).get())); continue; } + + // Decimals are stored as BigInteger, so convert and compare + if (a instanceof HiveDecimal) { + HiveDecimalWritable dec = (HiveDecimalWritable) b; + System.out.println("row = "+a.toString()+", vec="+dec.toString()); + Assert.assertEquals(a, dec.getHiveDecimal()); + } + if (null == a) { Assert.assertEquals(true, (b == null || (b instanceof NullWritable))); } else { @@ -180,6 +195,7 @@ private void checkVectorizedReader() throws Exception { Assert.assertEquals(false, batch.cols[6].isRepeating); Assert.assertEquals(false, batch.cols[7].isRepeating); Assert.assertEquals(false, batch.cols[8].isRepeating); + Assert.assertEquals(false, batch.cols[9].isRepeating); // Check non null Assert.assertEquals(false, batch.cols[0].noNulls); @@ -191,6 +207,7 @@ private void checkVectorizedReader() throws Exception { Assert.assertEquals(false, batch.cols[6].noNulls); Assert.assertEquals(false, batch.cols[7].noNulls); Assert.assertEquals(false, batch.cols[8].noNulls); + Assert.assertEquals(false, batch.cols[9].noNulls); } Assert.assertEquals(false, rr.hasNext()); }