diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 5af6bf7..a473111 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -36,10 +36,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -128,6 +130,48 @@ } } + public static class AllTypesRow { + Boolean boolean1; + Byte byte1; + Short short1; + Integer int1; + Long long1; + Float float1; + Double double1; + BytesWritable bytes1; + Text string1; + MiddleStruct middle; + List list = new ArrayList(); + Map map = new HashMap(); + Timestamp ts; + HiveDecimal decimal1; + + AllTypesRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1, + Double d1, + BytesWritable b3, String s2, MiddleStruct m1, + List l2, Map m2, + Timestamp ts1, HiveDecimal decimal) { + this.boolean1 = b1; + this.byte1 = b2; + this.short1 = s1; + this.int1 = i1; + this.long1 = l1; + this.float1 = f1; + this.double1 = d1; + this.bytes1 = b3; + if (s2 == null) { + this.string1 = null; + } else { + this.string1 = new Text(s2); + } + this.middle = m1; + this.list = l2; + this.map = m2; + this.ts = ts1; + this.decimal1 = decimal; + } + } + private static InnerStruct inner(int i, String s) { return new InnerStruct(i, s); } @@ -185,6 +229,290 @@ public void openFileSystem () throws Exception { } @Test + public void testWriteFormat_0_11() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory + .getReflectionObjectInspector(AllTypesRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + conf.set("hive.exec.orc.write.format", "0.11"); + Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, + 100000, CompressionKind.NONE, 10000, 10000); + for(int i = 0; i < 7500; i++) { + if (i % 2 == 0) { + writer.addRow(new AllTypesRow(false, (byte) 1, (short) 1024, 65536, + Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi", + new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list( + inner(3, "good"), inner(4, "bad")), map(), Timestamp + .valueOf("2000-03-12 15:00:00"), new HiveDecimal( + "12345678.6547456"))); + } else { + writer.addRow(new AllTypesRow(true, (byte) 100, (short) 2048, 65536, + Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye", + new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list( + inner(100000000, "cat"), inner(-100000, "in"), + inner(1234, "hat")), + map(inner(5, "chani"), inner(1, "mauddib")), Timestamp + .valueOf("2000-03-12 15:00:01"), new HiveDecimal( + "12345678.6547457"))); + } + } + writer.close(); + } + + @Test + public void testReadFormat_0_11() throws Exception { + Path resourceDir = new Path(System.getProperty("test.build.resources", "ql" + + File.separator + "src" + File.separator + "test" + File.separator + + "resources")); + Path oldFilePath = new Path(resourceDir, "orc-file-11-format.orc"); + Reader reader = OrcFile.createReader(fs, oldFilePath); + + int stripeCount = 0; + int rowCount = 0; + long currentOffset = -1; + for(StripeInformation stripe : reader.getStripes()) { + stripeCount += 1; + rowCount += stripe.getNumberOfRows(); + if (currentOffset < 0) { + currentOffset = stripe.getOffset() + stripe.getIndexLength() + + stripe.getDataLength() + stripe.getFooterLength(); + } else { + assertEquals(currentOffset, stripe.getOffset()); + currentOffset += stripe.getIndexLength() + stripe.getDataLength() + + stripe.getFooterLength(); + } + } + assertEquals(reader.getNumberOfRows(), rowCount); + assertEquals(2, stripeCount); + + // check the stats + ColumnStatistics[] stats = reader.getStatistics(); + assertEquals(7500, stats[1].getNumberOfValues()); + assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount()); + assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount()); + assertEquals("count: 7500 true: 3750", stats[1].toString()); + + assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum()); + assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum()); + assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined()); + assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum()); + assertEquals("count: 7500 min: 1024 max: 2048 sum: 11520000", + stats[3].toString()); + + assertEquals(Long.MAX_VALUE, + ((IntegerColumnStatistics) stats[5]).getMaximum()); + assertEquals(Long.MAX_VALUE, + ((IntegerColumnStatistics) stats[5]).getMinimum()); + assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined()); + assertEquals( + "count: 7500 min: 9223372036854775807 max: 9223372036854775807", + stats[5].toString()); + + assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum()); + assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum()); + assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(), + 0.00001); + assertEquals("count: 7500 min: -15.0 max: -5.0 sum: -75000.0", + stats[7].toString()); + + assertEquals("count: 7500 min: bye max: hi", stats[9].toString()); + + // check the inspectors + StructObjectInspector readerInspector = (StructObjectInspector) reader + .getObjectInspector(); + assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory()); + assertEquals("struct>>,list:array>," + + "map:map>,ts:timestamp," + + "decimal1:decimal>", readerInspector.getTypeName()); + List fields = readerInspector + .getAllStructFieldRefs(); + BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector + .getStructFieldRef("boolean1").getFieldObjectInspector(); + ByteObjectInspector by = (ByteObjectInspector) readerInspector + .getStructFieldRef("byte1").getFieldObjectInspector(); + ShortObjectInspector sh = (ShortObjectInspector) readerInspector + .getStructFieldRef("short1").getFieldObjectInspector(); + IntObjectInspector in = (IntObjectInspector) readerInspector + .getStructFieldRef("int1").getFieldObjectInspector(); + LongObjectInspector lo = (LongObjectInspector) readerInspector + .getStructFieldRef("long1").getFieldObjectInspector(); + FloatObjectInspector fl = (FloatObjectInspector) readerInspector + .getStructFieldRef("float1").getFieldObjectInspector(); + DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector + .getStructFieldRef("double1").getFieldObjectInspector(); + BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector + .getStructFieldRef("bytes1").getFieldObjectInspector(); + StringObjectInspector st = (StringObjectInspector) readerInspector + .getStructFieldRef("string1").getFieldObjectInspector(); + StructObjectInspector mid = (StructObjectInspector) readerInspector + .getStructFieldRef("middle").getFieldObjectInspector(); + List midFields = mid.getAllStructFieldRefs(); + ListObjectInspector midli = (ListObjectInspector) midFields.get(0) + .getFieldObjectInspector(); + StructObjectInspector inner = (StructObjectInspector) midli + .getListElementObjectInspector(); + List inFields = inner.getAllStructFieldRefs(); + ListObjectInspector li = (ListObjectInspector) readerInspector + .getStructFieldRef("list").getFieldObjectInspector(); + MapObjectInspector ma = (MapObjectInspector) readerInspector + .getStructFieldRef("map").getFieldObjectInspector(); + TimestampObjectInspector tso = (TimestampObjectInspector) readerInspector + .getStructFieldRef("ts").getFieldObjectInspector(); + HiveDecimalObjectInspector dco = (HiveDecimalObjectInspector) readerInspector + .getStructFieldRef("decimal1").getFieldObjectInspector(); + StringObjectInspector mk = (StringObjectInspector) ma + .getMapKeyObjectInspector(); + RecordReader rows = reader.rows(null); + Object row = rows.next(null); + assertNotNull(row); + // check the contents of the first row + assertEquals(false, + bo.get(readerInspector.getStructFieldData(row, fields.get(0)))); + assertEquals(1, + by.get(readerInspector.getStructFieldData(row, fields.get(1)))); + assertEquals(1024, + sh.get(readerInspector.getStructFieldData(row, fields.get(2)))); + assertEquals(65536, + in.get(readerInspector.getStructFieldData(row, fields.get(3)))); + assertEquals(Long.MAX_VALUE, + lo.get(readerInspector.getStructFieldData(row, fields.get(4)))); + assertEquals(1.0, + fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001); + assertEquals(-15.0, + dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), + 0.00001); + assertEquals(bytes(0, 1, 2, 3, 4), + bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, + fields.get(7)))); + assertEquals("hi", st.getPrimitiveJavaObject(readerInspector + .getStructFieldData(row, fields.get(8)))); + List midRow = midli.getList(mid.getStructFieldData( + readerInspector.getStructFieldData(row, fields.get(9)), + midFields.get(0))); + assertNotNull(midRow); + assertEquals(2, midRow.size()); + assertEquals(1, + in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0)))); + assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData( + midRow.get(0), inFields.get(1)))); + assertEquals(2, + in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0)))); + assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData( + midRow.get(1), inFields.get(1)))); + List list = li.getList(readerInspector.getStructFieldData(row, + fields.get(10))); + assertEquals(2, list.size()); + assertEquals(3, + in.get(inner.getStructFieldData(list.get(0), inFields.get(0)))); + assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(0), inFields.get(1)))); + assertEquals(4, + in.get(inner.getStructFieldData(list.get(1), inFields.get(0)))); + assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(1), inFields.get(1)))); + Map map = ma.getMap(readerInspector.getStructFieldData(row, + fields.get(11))); + assertEquals(0, map.size()); + assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"), + tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(12)))); + assertEquals(new HiveDecimal("12345678.6547456"), + dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(13)))); + + // check the contents of second row + assertEquals(true, rows.hasNext()); + rows.seekToRow(7499); + row = rows.next(null); + assertEquals(true, + bo.get(readerInspector.getStructFieldData(row, fields.get(0)))); + assertEquals(100, + by.get(readerInspector.getStructFieldData(row, fields.get(1)))); + assertEquals(2048, + sh.get(readerInspector.getStructFieldData(row, fields.get(2)))); + assertEquals(65536, + in.get(readerInspector.getStructFieldData(row, fields.get(3)))); + assertEquals(Long.MAX_VALUE, + lo.get(readerInspector.getStructFieldData(row, fields.get(4)))); + assertEquals(2.0, + fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001); + assertEquals(-5.0, + dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), + 0.00001); + assertEquals(bytes(), bi.getPrimitiveWritableObject(readerInspector + .getStructFieldData(row, fields.get(7)))); + assertEquals("bye", st.getPrimitiveJavaObject(readerInspector + .getStructFieldData(row, fields.get(8)))); + midRow = midli.getList(mid.getStructFieldData( + readerInspector.getStructFieldData(row, fields.get(9)), + midFields.get(0))); + assertNotNull(midRow); + assertEquals(2, midRow.size()); + assertEquals(1, + in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0)))); + assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData( + midRow.get(0), inFields.get(1)))); + assertEquals(2, + in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0)))); + assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData( + midRow.get(1), inFields.get(1)))); + list = li.getList(readerInspector.getStructFieldData(row, fields.get(10))); + assertEquals(3, list.size()); + assertEquals(100000000, + in.get(inner.getStructFieldData(list.get(0), inFields.get(0)))); + assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(0), inFields.get(1)))); + assertEquals(-100000, + in.get(inner.getStructFieldData(list.get(1), inFields.get(0)))); + assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(1), inFields.get(1)))); + assertEquals(1234, + in.get(inner.getStructFieldData(list.get(2), inFields.get(0)))); + assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(2), inFields.get(1)))); + map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11))); + assertEquals(2, map.size()); + boolean[] found = new boolean[2]; + for(Object key : map.keySet()) { + String str = mk.getPrimitiveJavaObject(key); + if (str.equals("chani")) { + assertEquals(false, found[0]); + assertEquals(5, + in.get(inner.getStructFieldData(map.get(key), inFields.get(0)))); + assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData( + map.get(key), inFields.get(1)))); + found[0] = true; + } else if (str.equals("mauddib")) { + assertEquals(false, found[1]); + assertEquals(1, + in.get(inner.getStructFieldData(map.get(key), inFields.get(0)))); + assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData( + map.get(key), inFields.get(1)))); + found[1] = true; + } else { + throw new IllegalArgumentException("Unknown key " + str); + } + } + assertEquals(true, found[0]); + assertEquals(true, found[1]); + assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"), + tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(12)))); + assertEquals(new HiveDecimal("12345678.6547457"), + dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(13)))); + + // handle the close up + assertEquals(false, rows.hasNext()); + rows.close(); + } + + @Test public void test1() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) {