diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 0e19e59..ffd7ef2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -18,16 +18,20 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.sql.Timestamp; import java.util.List; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; @@ -88,6 +92,28 @@ public static void AddRowToBatch(Object row, StructObjectInspector oi, int rowIn // NOTE: The default value for null fields in vectorization is 1 for int types, NaN for // float/double. String types have no default value for null. switch (poi.getPrimitiveCategory()) { + case BOOLEAN: { + LongColumnVector lcv = (LongColumnVector) batch.cols[i]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0; + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + SetNullColIsNullValue(lcv, rowIndex); + } + } + break; + case BYTE: { + LongColumnVector lcv = (LongColumnVector) batch.cols[i]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((ByteWritable) writableCol).get(); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + SetNullColIsNullValue(lcv, rowIndex); + } + } + break; case SHORT: { LongColumnVector lcv = (LongColumnVector) batch.cols[i]; if (writableCol != null) { @@ -143,6 +169,18 @@ public static void AddRowToBatch(Object row, StructObjectInspector oi, int rowIn } } break; + case TIMESTAMP: { + LongColumnVector lcv = (LongColumnVector) batch.cols[i]; + if (writableCol != null) { + Timestamp t = ((TimestampWritable) writableCol).getTimestamp(); + lcv.vector[rowIndex] = (t.getTime() * 1000000) + (t.getNanos() % 1000000); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + SetNullColIsNullValue(lcv, rowIndex); + } + } + break; case STRING: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[i]; if (writableCol != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java index 676b86a..3725452 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.nio.ByteBuffer; +import java.sql.Timestamp; import java.util.List; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -28,7 +29,9 @@ import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -121,6 +124,18 @@ public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspe } switch (poi.getPrimitiveCategory()) { + case BOOLEAN: { + LongColumnVector lcv = (LongColumnVector) batch.cols[k]; + // In vectorization true is stored as 1 and false as 0 + boolean b = lcv.vector[rowIndex] == 1 ? true : false; + if (b) { + serializeVectorStream.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length); + } else { + serializeVectorStream.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length); + } + } + break; + case BYTE: case SHORT: case INT: case LONG: @@ -141,6 +156,16 @@ public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspe serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams .getNeedsEscape()); break; + case TIMESTAMP: + LongColumnVector tcv = (LongColumnVector) batch.cols[k]; + long timeInNanoSec = tcv.vector[rowIndex]; + Timestamp t = new Timestamp(0); + t.setTime((timeInNanoSec)/1000000); + t.setNanos((int)((t.getNanos()) + (timeInNanoSec % 1000000))); + TimestampWritable tw = new TimestampWritable(); + tw.set(t); + LazyTimestamp.writeUTF8(serializeVectorStream, tw); + break; default: throw new UnsupportedOperationException( "Vectorizaton is not supported for datatype:" diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 1a57766..5018ea1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -197,11 +197,14 @@ public VectorizedRowBatch CreateVectorizedRowBatch() throws HiveException case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; // Vectorization currently only supports the following data types: - // SHORT, INT, LONG, FLOAT, DOUBLE, STRING + // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING and TIMESTAMP switch (poi.getPrimitiveCategory()) { + case BOOLEAN: + case BYTE: case SHORT: case INT: case LONG: + case TIMESTAMP: result.cols[j] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; case FLOAT: @@ -232,9 +235,6 @@ public VectorizedRowBatch CreateVectorizedRowBatch() throws HiveException return result; } - - - /** * Adds the row to the batch after deserializing the row * diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java index 11c3fdf..16c063f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java @@ -20,8 +20,10 @@ import java.io.EOFException; import java.io.IOException; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + class BitFieldReader { - private RunLengthByteReader input; + private final RunLengthByteReader input; private final int bitSize; private int current; private int bitsLeft; @@ -60,6 +62,23 @@ int next() throws IOException { return result & mask; } + void nextVector(LongColumnVector previous, long previousLen) + throws IOException { + previous.isRepeating = true; + for (int i = 0; i < previousLen; i++) { + if (!previous.isNull[i]) { + previous.vector[i] = next(); + } else { + // The default value of null for int types in vectorized + // processing is 1, so set that if the value is null + previous.vector[i] = 1; + } + if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) { + previous.isRepeating = false; + } + } + } + void seek(PositionProvider index) throws IOException { input.seek(index); int consumed = (int) index.getNext(); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index f15779d..c29d97f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -240,8 +240,19 @@ Object next(Object previous) throws IOException { @Override Object nextVector(Object previousVector, long batchSize) throws IOException { - throw new UnsupportedOperationException( - "NextVector is not supported operation on Boolean type"); + LongColumnVector result = null; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + reader.nextVector(result, batchSize); + return result; } } @@ -284,8 +295,19 @@ Object next(Object previous) throws IOException { @Override Object nextVector(Object previousVector, long batchSize) throws IOException { - throw new UnsupportedOperationException( - "NextVector is not supported operation for Byte type"); + LongColumnVector result = null; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + reader.nextVector(result, batchSize); + return result; } @Override @@ -708,6 +730,7 @@ void skipRows(long items) throws IOException { private static class TimestampTreeReader extends TreeReader{ private RunLengthIntegerReader data; private RunLengthIntegerReader nanos; + private final LongColumnVector nanoVector = new LongColumnVector(); TimestampTreeReader(int columnId) { super(columnId); @@ -758,8 +781,47 @@ Object next(Object previous) throws IOException { @Override Object nextVector(Object previousVector, long batchSize) throws IOException { - throw new UnsupportedOperationException( - "NextVector is not supported operation for TimeStamp type"); + LongColumnVector result = null; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + data.nextVector(result, batchSize); + nanoVector.isNull = result.isNull; + nanos.nextVector(nanoVector, batchSize); + + if (!(result.isRepeating && nanoVector.isRepeating)) { + + // Non repeating values preset in the vector. Iterate thru the vector and populate the time + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = (result.vector[i] + WriterImpl.BASE_TIMESTAMP) + * WriterImpl.MILLIS_PER_SECOND; + nanoVector.vector[i] = parseNanos(nanoVector.vector[i]); + // fix the rounding when we divided by 1000. + if (result.vector[i] >= 0) { + result.vector[i] += nanoVector.vector[i] / 1000000; + } else { + result.vector[i] -= nanoVector.vector[i] / 1000000; + } + // Convert millis into nanos and add the nano vector value to it + result.vector[i] = (result.vector[i] * 1000000) + + nanoVector.vector[i]; + } + } + } else { + // Entire vector has repeating values + if (!result.isNull[0]) { + result.vector[0] = (result.vector[0] * 1000000) + + nanoVector.vector[0]; + } + } + return result; } private static int parseNanos(long serialized) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java index 678a2f5..23d54e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java @@ -20,6 +20,8 @@ import java.io.EOFException; import java.io.IOException; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + /** * A reader that reads a sequence of bytes. A control byte is read before * each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the @@ -82,6 +84,23 @@ byte next() throws IOException { return result; } + void nextVector(LongColumnVector previous, long previousLen) + throws IOException { + previous.isRepeating = true; + for (int i = 0; i < previousLen; i++) { + if (!previous.isNull[i]) { + previous.vector[i] = next(); + } else { + // The default value of null for int types in vectorized + // processing is 1, so set that if the value is null + previous.vector[i] = 1; + } + if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) { + previous.isRepeating = false; + } + } + } + void seek(PositionProvider index) throws IOException { input.seek(index); int consumed = (int) index.getNext(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java index 90d3134..78ebb17 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java @@ -20,6 +20,8 @@ import java.io.File; import java.io.IOException; +import java.sql.Timestamp; +import java.util.Calendar; import java.util.List; import java.util.Properties; @@ -36,13 +38,16 @@ import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; @@ -82,10 +87,10 @@ private void InitSerde() { // Set the configuration parameters tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "6"); tbl.setProperty("columns", - "ashort,aint,along,adouble,afloat,astring"); + "ashort,aint,along,adouble,afloat,astring,abyte,aboolean,atimestamp"); tbl.setProperty("columns.types", - "smallint:int:bigint:double:float:string"); - colCount = 6; + "smallint:int:bigint:double:float:string:tinyint:boolean:timestamp"); + colCount = 9; tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); try { @@ -109,7 +114,8 @@ private void WriteRCFile(FileSystem fs, Path file, Configuration conf) BytesRefArrayWritable bytes = new BytesRefArrayWritable(colCount); BytesRefWritable cu; - if (i % 3 != 0) { + if (i % 3 != 0) { + //if (i < 100) { cu = new BytesRefWritable((i + "").getBytes("UTF-8"), 0, (i + "").getBytes("UTF-8").length); bytes.set(0, cu); @@ -132,6 +138,20 @@ private void WriteRCFile(FileSystem fs, Path file, Configuration conf) cu = new BytesRefWritable(("Test string").getBytes("UTF-8"), 0, ("Test string").getBytes("UTF-8").length); bytes.set(5, cu); + + cu = new BytesRefWritable((1 + "").getBytes("UTF-8"), 0, + (1 + "").getBytes("UTF-8").length); + bytes.set(6, cu); + + cu = new BytesRefWritable(("true").getBytes("UTF-8"), 0, + ("true").getBytes("UTF-8").length); + bytes.set(7, cu); + + Timestamp t = new Timestamp(Calendar.getInstance().getTime().getTime()); + cu = new BytesRefWritable(t.toString().getBytes("UTF-8"), 0, + t.toString().getBytes("UTF-8").length); + bytes.set(8, cu); + } else { cu = new BytesRefWritable((i + "").getBytes("UTF-8"), 0, (i + "").getBytes("UTF-8").length); bytes.set(0, cu); @@ -151,6 +171,19 @@ private void WriteRCFile(FileSystem fs, Path file, Configuration conf) cu = new BytesRefWritable(("Test string").getBytes("UTF-8"), 0, ("Test string").getBytes("UTF-8").length); bytes.set(5, cu); + + cu = new BytesRefWritable(new byte[0], 0, 0); + bytes.set(6, cu); + + cu = new BytesRefWritable(new byte[0], 0, 0); + bytes.set(7, cu); + +// cu = new BytesRefWritable(new byte[0], 0, 0); +// bytes.set(8, cu); + Timestamp t = new Timestamp(Calendar.getInstance().getTime().getTime()); + cu = new BytesRefWritable(t.toString().getBytes("UTF-8"), 0, + t.toString().getBytes("UTF-8").length); + bytes.set(8, cu); } writer.append(bytes); } @@ -166,7 +199,7 @@ private VectorizedRowBatch GetRowBatch() throws SerDeException, HiveException, I .getObjectInspector(); List fieldRefs = oi.getAllStructFieldRefs(); - Assert.assertEquals("Field size should be 6", colCount, fieldRefs.size()); + Assert.assertEquals("Field size should be 9", colCount, fieldRefs.size()); // Create the context VectorizedRowBatchCtx ctx = new VectorizedRowBatchCtx(oi, oi, serDe, null); @@ -213,6 +246,17 @@ void ValidateRowBatch(VectorizedRowBatch batch) throws IOException, SerDeExcepti Object writableCol = poi.getPrimitiveWritableObject(fieldData); if (writableCol != null) { switch (poi.getPrimitiveCategory()) { + case BOOLEAN: { + LongColumnVector lcv = (LongColumnVector) batch.cols[j]; + Assert.assertEquals(true, lcv.vector[i] == (((BooleanWritable) writableCol).get() ? 1 + : 0)); + } + break; + case BYTE: { + LongColumnVector lcv = (LongColumnVector) batch.cols[j]; + Assert.assertEquals(true, lcv.vector[i] == (long) ((ByteWritable) writableCol).get()); + } + break; case SHORT: { LongColumnVector lcv = (LongColumnVector) batch.cols[j]; Assert.assertEquals(true, lcv.vector[i] == ((ShortWritable) writableCol).get()); @@ -247,6 +291,13 @@ void ValidateRowBatch(VectorizedRowBatch batch) throws IOException, SerDeExcepti Assert.assertEquals(true, a.equals(b)); } break; + case TIMESTAMP: { + LongColumnVector tcv = (LongColumnVector) batch.cols[j]; + Timestamp t = ((TimestampWritable) writableCol).getTimestamp(); + long timeInNanoSec = (t.getTime() * 1000000) + (t.getNanos() % 1000000); + Assert.assertEquals(true, tcv.vector[i] == timeInNanoSec); + } + break; default: Assert.assertEquals("Unknown type", false); } @@ -275,18 +326,18 @@ void ValidateRowBatch(VectorizedRowBatch batch) throws IOException, SerDeExcepti @Test public void TestCtx() throws Exception { - InitSerde(); - WriteRCFile(this.fs, this.testFilePath, this.conf); - VectorizedRowBatch batch = GetRowBatch(); - ValidateRowBatch(batch); - - // Test VectorizedColumnarSerDe - VectorizedColumnarSerDe vcs = new VectorizedColumnarSerDe(); - vcs.initialize(this.conf, tbl); - Writable w = vcs.serializeVector(batch, (StructObjectInspector) serDe - .getObjectInspector()); - BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[])((ObjectWritable)w).get(); - vcs.deserializeVector(refArray, 10, batch); - ValidateRowBatch(batch); + InitSerde(); + WriteRCFile(this.fs, this.testFilePath, this.conf); + VectorizedRowBatch batch = GetRowBatch(); + ValidateRowBatch(batch); + + // Test VectorizedColumnarSerDe + VectorizedColumnarSerDe vcs = new VectorizedColumnarSerDe(); + vcs.initialize(this.conf, tbl); + Writable w = vcs.serializeVector(batch, (StructObjectInspector) serDe + .getObjectInspector()); + BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[]) ((ObjectWritable) w).get(); + vcs.deserializeVector(refArray, 10, batch); + ValidateRowBatch(batch); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index e8c75c6..cf264f2 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.io.orc; import java.io.File; +import java.sql.Timestamp; +import java.util.Calendar; import java.util.Random; import junit.framework.Assert; @@ -29,8 +31,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; import org.junit.Before; import org.junit.Test; @@ -58,18 +60,24 @@ public void openFileSystem() throws Exception { } static class MyRecord { + private final Boolean bo; + private final Byte by; private final Integer i; private final Long l; private final Short s; private final Double d; private final String k; + private final Timestamp t; - MyRecord(Integer i, Long l, Short s, Double d, String k) { + MyRecord(Boolean bo, Byte by, Integer i, Long l, Short s, Double d, String k, Timestamp t) { + this.bo = bo; + this.by = by; this.i = i; this.l = l; this.s = s; this.d = d; this.k = k; + this.t = t; } } @@ -96,11 +104,11 @@ public void createFile() throws Exception { "Heaven,", "we", "were", "all", "going", "direct", "the", "other", "way"}; for (int i = 0; i < 21000; ++i) { - if ((i % 3) != 0) { - writer.addRow(new MyRecord(i, (long) 200, (short) (300 + i), (double) (400 + i), - words[r1.nextInt(words.length)])); + if ((i % 7) != 0) { + writer.addRow(new MyRecord(((i % 3) == 0), (byte)(i % 5), i, (long) 200, (short) (300 + i), (double) (400 + i), + words[r1.nextInt(words.length)], new Timestamp(Calendar.getInstance().getTime().getTime()))); } else { - writer.addRow(new MyRecord(i, (long) 200, null, null, null)); + writer.addRow(new MyRecord(null, null, i, (long) 200, null, null, null, null)); } } writer.close(); @@ -122,8 +130,21 @@ private void checkVectorizedReader() throws Exception { for (int i = 0; i < batch.size; i++) { row = (OrcStruct) rr.next((Object) row); for (int j = 0; j < batch.cols.length; j++) { - Object a = ((Writable) row.getFieldValue(j)); + Object a = (row.getFieldValue(j)); Object b = batch.cols[j].getWritableObject(i); + // Boolean values are stores a 1's and 0's, so convert and compare + if (a instanceof BooleanWritable) { + Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0); + Assert.assertEquals(true, temp.toString().equals(b.toString())); + continue; + } + // Timestamps are stored as long, so convert and compare + if (a instanceof Timestamp) { + Timestamp t = ((Timestamp) a); + Long timeInNanoSec = (t.getTime() * 1000000) + t.getNanos(); + Assert.assertEquals(true, timeInNanoSec.toString().equals(b.toString())); + continue; + } if (null == a) { Assert.assertEquals(true, (b == null || (b instanceof NullWritable))); } else { @@ -134,17 +155,23 @@ private void checkVectorizedReader() throws Exception { // Check repeating Assert.assertEquals(false, batch.cols[0].isRepeating); - Assert.assertEquals(true, batch.cols[1].isRepeating); + Assert.assertEquals(false, batch.cols[1].isRepeating); Assert.assertEquals(false, batch.cols[2].isRepeating); - Assert.assertEquals(false, batch.cols[3].isRepeating); + Assert.assertEquals(true, batch.cols[3].isRepeating); Assert.assertEquals(false, batch.cols[4].isRepeating); + Assert.assertEquals(false, batch.cols[5].isRepeating); + Assert.assertEquals(false, batch.cols[6].isRepeating); + Assert.assertEquals(false, batch.cols[7].isRepeating); // Check non null - Assert.assertEquals(true, batch.cols[0].noNulls); - Assert.assertEquals(true, batch.cols[1].noNulls); - Assert.assertEquals(false, batch.cols[2].noNulls); - Assert.assertEquals(false, batch.cols[3].noNulls); + Assert.assertEquals(false, batch.cols[0].noNulls); + Assert.assertEquals(false, batch.cols[1].noNulls); + Assert.assertEquals(true, batch.cols[2].noNulls); + Assert.assertEquals(true, batch.cols[3].noNulls); Assert.assertEquals(false, batch.cols[4].noNulls); + Assert.assertEquals(false, batch.cols[5].noNulls); + Assert.assertEquals(false, batch.cols[6].noNulls); + Assert.assertEquals(false, batch.cols[7].noNulls); } Assert.assertEquals(false, rr.hasNext()); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 6958284..2f4530e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -127,8 +127,8 @@ public static String convertToString(byte[] bytes, int start, int length) { } } - private static byte[] trueBytes = {(byte) 't', 'r', 'u', 'e'}; - private static byte[] falseBytes = {(byte) 'f', 'a', 'l', 's', 'e'}; + public static byte[] trueBytes = {(byte) 't', 'r', 'u', 'e'}; + public static byte[] falseBytes = {(byte) 'f', 'a', 'l', 's', 'e'}; /** * Write the bytes with special characters escaped.