Index: build.xml
===================================================================
--- build.xml (revision 1440302)
+++ build.xml (working copy)
@@ -684,6 +684,7 @@
+
Index: contrib/ivy.xml
===================================================================
--- contrib/ivy.xml (revision 1440302)
+++ contrib/ivy.xml (working copy)
@@ -36,7 +36,19 @@
transitive="false"/>
+
+
+
+
+
+
+
Index: contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
===================================================================
--- contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (revision 0)
+++ contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (working copy)
@@ -0,0 +1,777 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import static junit.framework.Assert.*;
+
+/**
+ * Tests for the top level reader/streamFactory of ORC files.
+ */
+public class TestOrcFile {
+
+ public static class InnerStruct {
+ int int1;
+ Text string1 = new Text();
+ InnerStruct(int int1, String string1) {
+ this.int1 = int1;
+ this.string1.set(string1);
+ }
+ }
+
+ public static class MiddleStruct {
+ List list = new ArrayList();
+
+ MiddleStruct(InnerStruct... items) {
+ list.clear();
+ for(InnerStruct item: items) {
+ list.add(item);
+ }
+ }
+ }
+
+ public static class BigRow {
+ Boolean boolean1;
+ Byte byte1;
+ Short short1;
+ Integer int1;
+ Long long1;
+ Float float1;
+ Double double1;
+ BytesWritable bytes1;
+ Text string1;
+ MiddleStruct middle;
+ List list = new ArrayList();
+ Map map = new HashMap();
+
+ BigRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1,
+ Double d1,
+ BytesWritable b3, String s2, MiddleStruct m1,
+ List l2, Map m2) {
+ this.boolean1 = b1;
+ this.byte1 = b2;
+ this.short1 = s1;
+ this.int1 = i1;
+ this.long1 = l1;
+ this.float1 = f1;
+ this.double1 = d1;
+ this.bytes1 = b3;
+ if (s2 == null) {
+ this.string1 = null;
+ } else {
+ this.string1 = new Text(s2);
+ }
+ this.middle = m1;
+ this.list = l2;
+ this.map = m2;
+ }
+ }
+
+ private static InnerStruct inner(int i, String s) {
+ return new InnerStruct(i, s);
+ }
+
+ private static Map map(InnerStruct... items) {
+ Map result = new HashMap();
+ for(InnerStruct i: items) {
+ result.put(new Text(i.string1), i);
+ }
+ return result;
+ }
+
+ private static List list(InnerStruct... items) {
+ List result = new ArrayList();
+ for(InnerStruct s: items) {
+ result.add(s);
+ }
+ return result;
+ }
+
+ private static BytesWritable bytes(int... items) {
+ BytesWritable result = new BytesWritable();
+ result.setSize(items.length);
+ for(int i=0; i < items.length; ++i) {
+ result.getBytes()[i] = (byte) items[i];
+ }
+ return result;
+ }
+
+ private static ByteBuffer byteBuf(int... items) {
+ ByteBuffer result = ByteBuffer.allocate(items.length);
+ for(int item: items) {
+ result.put((byte) item);
+ }
+ return result;
+ }
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir",
+ "target/test/tmp"));
+
+ @Test
+ public void test1() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ ObjectInspector inspector =
+ ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Path p = new Path(workDir, "file.orc");
+ fs.delete(p, false);
+ Writer writer = OrcFile.createWriter(fs, p, inspector,
+ 100000, CompressionKind.ZLIB, 10000, 10000);
+ writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
+ Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0,1,2,3,4), "hi",
+ new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+ list(inner(3, "good"), inner(4, "bad")),
+ map()));
+ writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536,
+ Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
+ new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+ list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
+ map(inner(5,"chani"), inner(1,"mauddib"))));
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, p);
+
+ // check the stats
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(2, stats[1].getNumberOfValues());
+ assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+ assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+ assertEquals("count: 2 true: 1", stats[1].toString());
+
+ assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+ assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+ assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+ assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
+ assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+ stats[3].toString());
+
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMaximum());
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMinimum());
+ assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
+ assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+ stats[5].toString());
+
+ assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
+ assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
+ assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
+ assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+ stats[7].toString());
+
+ assertEquals("count: 2 min: bye max: hi", stats[9].toString());
+
+ // check the inspectors
+ StructObjectInspector readerInspector =
+ (StructObjectInspector) reader.getObjectInspector();
+ assertEquals(ObjectInspector.Category.STRUCT,
+ readerInspector.getCategory());
+ assertEquals("struct>>,list:list>,"
+ + "map:map>>",
+ readerInspector.getTypeName());
+ List extends StructField> fields =
+ readerInspector.getAllStructFieldRefs();
+ BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.
+ getStructFieldRef("boolean1").getFieldObjectInspector();
+ ByteObjectInspector by = (ByteObjectInspector) readerInspector.
+ getStructFieldRef("byte1").getFieldObjectInspector();
+ ShortObjectInspector sh = (ShortObjectInspector) readerInspector.
+ getStructFieldRef("short1").getFieldObjectInspector();
+ IntObjectInspector in = (IntObjectInspector) readerInspector.
+ getStructFieldRef("int1").getFieldObjectInspector();
+ LongObjectInspector lo = (LongObjectInspector) readerInspector.
+ getStructFieldRef("long1").getFieldObjectInspector();
+ FloatObjectInspector fl = (FloatObjectInspector) readerInspector.
+ getStructFieldRef("float1").getFieldObjectInspector();
+ DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.
+ getStructFieldRef("double1").getFieldObjectInspector();
+ BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
+ getStructFieldRef("bytes1").getFieldObjectInspector();
+ StringObjectInspector st = (StringObjectInspector) readerInspector.
+ getStructFieldRef("string1").getFieldObjectInspector();
+ StructObjectInspector mid = (StructObjectInspector) readerInspector.
+ getStructFieldRef("middle").getFieldObjectInspector();
+ List extends StructField> midFields =
+ mid.getAllStructFieldRefs();
+ ListObjectInspector midli =
+ (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
+ StructObjectInspector inner = (StructObjectInspector)
+ midli.getListElementObjectInspector();
+ List extends StructField> inFields = inner.getAllStructFieldRefs();
+ ListObjectInspector li = (ListObjectInspector) readerInspector.
+ getStructFieldRef("list").getFieldObjectInspector();
+ MapObjectInspector ma = (MapObjectInspector) readerInspector.
+ getStructFieldRef("map").getFieldObjectInspector();
+ StructObjectInspector lc = (StructObjectInspector)
+ li.getListElementObjectInspector();
+ StringObjectInspector mk = (StringObjectInspector)
+ ma.getMapKeyObjectInspector();
+ StructObjectInspector mv = (StructObjectInspector)
+ ma.getMapValueObjectInspector();
+ RecordReader rows = reader.rows(null);
+ Object row = rows.next(null);
+ assertNotNull(row);
+ // check the contents of the first row
+ assertEquals(false,
+ bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals(1, by.get(readerInspector.getStructFieldData(row,
+ fields.get(1))));
+ assertEquals(1024, sh.get(readerInspector.getStructFieldData(row,
+ fields.get(2))));
+ assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
+ fields.get(3))));
+ assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
+ getStructFieldData(row, fields.get(4))));
+ assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row,
+ fields.get(5))), 0.00001);
+ assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row,
+ fields.get(6))), 0.00001);
+ assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(7))));
+ assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(8))));
+ List> midRow = midli.getList(mid.getStructFieldData(readerInspector.
+ getStructFieldData(row, fields.get(9)), midFields.get(0)));
+ assertNotNull(midRow);
+ assertEquals(2, midRow.size());
+ assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
+ inFields.get(0))));
+ assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (midRow.get(0), inFields.get(1))));
+ assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
+ inFields.get(0))));
+ assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (midRow.get(1), inFields.get(1))));
+ List> list = li.getList(readerInspector.getStructFieldData(row,
+ fields.get(10)));
+ assertEquals(2, list.size());
+ assertEquals(3, in.get(inner.getStructFieldData(list.get(0),
+ inFields.get(0))));
+ assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(0), inFields.get(1))));
+ assertEquals(4, in.get(inner.getStructFieldData(list.get(1),
+ inFields.get(0))));
+ assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(1), inFields.get(1))));
+ Map,?> map = ma.getMap(readerInspector.getStructFieldData(row,
+ fields.get(11)));
+ assertEquals(0, map.size());
+
+ // check the contents of second row
+ assertEquals(true, rows.hasNext());
+ row = rows.next(row);
+ assertEquals(true,
+ bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals(100, by.get(readerInspector.getStructFieldData(row,
+ fields.get(1))));
+ assertEquals(2048, sh.get(readerInspector.getStructFieldData(row,
+ fields.get(2))));
+ assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
+ fields.get(3))));
+ assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
+ getStructFieldData(row, fields.get(4))));
+ assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row,
+ fields.get(5))), 0.00001);
+ assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row,
+ fields.get(6))), 0.00001);
+ assertEquals(bytes(), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(7))));
+ assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(8))));
+ midRow = midli.getList(mid.getStructFieldData(readerInspector.
+ getStructFieldData(row, fields.get(9)), midFields.get(0)));
+ assertNotNull(midRow);
+ assertEquals(2, midRow.size());
+ assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
+ inFields.get(0))));
+ assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (midRow.get(0), inFields.get(1))));
+ assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
+ inFields.get(0))));
+ assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (midRow.get(1), inFields.get(1))));
+ list = li.getList(readerInspector.getStructFieldData(row,
+ fields.get(10)));
+ assertEquals(3, list.size());
+ assertEquals(100000000, in.get(inner.getStructFieldData(list.get(0),
+ inFields.get(0))));
+ assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(0), inFields.get(1))));
+ assertEquals(-100000, in.get(inner.getStructFieldData(list.get(1),
+ inFields.get(0))));
+ assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(1), inFields.get(1))));
+ assertEquals(1234, in.get(inner.getStructFieldData(list.get(2),
+ inFields.get(0))));
+ assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(2), inFields.get(1))));
+ map = ma.getMap(readerInspector.getStructFieldData(row,
+ fields.get(11)));
+ assertEquals(2, map.size());
+ boolean[] found = new boolean[2];
+ for(Object key: map.keySet()) {
+ String str = mk.getPrimitiveJavaObject(key);
+ if (str.equals("chani")) {
+ assertEquals(false, found[0]);
+ assertEquals(5, in.get(inner.getStructFieldData(map.get(key),
+ inFields.get(0))));
+ assertEquals(str, st.getPrimitiveJavaObject(
+ inner.getStructFieldData(map.get(key), inFields.get(1))));
+ found[0] = true;
+ } else if (str.equals("mauddib")) {
+ assertEquals(false, found[1]);
+ assertEquals(1, in.get(inner.getStructFieldData(map.get(key),
+ inFields.get(0))));
+ assertEquals(str, st.getPrimitiveJavaObject(
+ inner.getStructFieldData(map.get(key), inFields.get(1))));
+ found[1] = true;
+ } else {
+ throw new IllegalArgumentException("Unknown key " + str);
+ }
+ }
+ assertEquals(true, found[0]);
+ assertEquals(true, found[1]);
+
+ // handle the close up
+ assertEquals(false, rows.hasNext());
+ rows.close();
+ }
+
+ @Test
+ public void columnProjection() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path p = new Path(workDir, "file.orc");
+ fs.delete(p, false);
+ ObjectInspector inspector =
+ ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer writer = OrcFile.createWriter(fs, p, inspector,
+ 1000, CompressionKind.NONE, 100, 1000);
+ Random r1 = new Random(1);
+ Random r2 = new Random(2);
+ int x;
+ int minInt=0, maxInt=0;
+ String y;
+ String minStr = null, maxStr = null;
+ for(int i=0; i < 21000; ++i) {
+ x = r1.nextInt();
+ y = Long.toHexString(r2.nextLong());
+ if (i == 0 || x < minInt) {
+ minInt = x;
+ }
+ if (i == 0 || x > maxInt) {
+ maxInt = x;
+ }
+ if (i == 0 || y.compareTo(minStr) < 0) {
+ minStr = y;
+ }
+ if (i == 0 || y.compareTo(maxStr) > 0) {
+ maxStr = y;
+ }
+ writer.addRow(inner(x, y));
+ }
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, p);
+
+ // check out the statistics
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(3, stats.length);
+ for(ColumnStatistics s: stats) {
+ assertEquals(21000, s.getNumberOfValues());
+ if (s instanceof IntegerColumnStatistics) {
+ assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
+ assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
+ } else if (s instanceof StringColumnStatistics) {
+ assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
+ assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
+ }
+ }
+
+ // check out the types
+ List types = reader.getTypes();
+ assertEquals(3, types.size());
+ assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
+ assertEquals(2, types.get(0).getSubtypesCount());
+ assertEquals(1, types.get(0).getSubtypes(0));
+ assertEquals(2, types.get(0).getSubtypes(1));
+ assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
+ assertEquals(0, types.get(1).getSubtypesCount());
+ assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
+ assertEquals(0, types.get(2).getSubtypesCount());
+
+ // read the contents and make sure they match
+ RecordReader rows1 = reader.rows(new boolean[]{true, true, false});
+ RecordReader rows2 = reader.rows(new boolean[]{true, false, true});
+ r1 = new Random(1);
+ r2 = new Random(2);
+ OrcStruct row1 = null;
+ OrcStruct row2 = null;
+ for(int i = 0; i < 21000; ++i) {
+ assertEquals(true, rows1.hasNext());
+ assertEquals(true, rows2.hasNext());
+ row1 = (OrcStruct) rows1.next(row1);
+ row2 = (OrcStruct) rows2.next(row2);
+ assertEquals(r1.nextInt(), ((IntWritable) row1.getFieldValue(0)).get());
+ assertEquals(Long.toHexString(r2.nextLong()),
+ row2.getFieldValue(1).toString());
+ }
+ assertEquals(false, rows1.hasNext());
+ assertEquals(false, rows2.hasNext());
+ rows1.close();
+ rows2.close();
+ }
+
+ @Test
+ public void emptyFile() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path p = new Path(workDir, "file.orc");
+ fs.delete(p, false);
+ ObjectInspector inspector =
+ ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer writer = OrcFile.createWriter(fs, p, inspector,
+ 1000, CompressionKind.NONE, 100, 10000);
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, p);
+ assertEquals(false, reader.rows(null).hasNext());
+ assertEquals(CompressionKind.NONE, reader.getCompression());
+ assertEquals(0, reader.getNumberOfRows());
+ assertEquals(0, reader.getCompressionSize());
+ assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
+ assertEquals(3, reader.getContentLength());
+ assertEquals(false, reader.getStripes().iterator().hasNext());
+ }
+
+ @Test
+ public void metaData() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path p = new Path(workDir, "file.orc");
+ fs.delete(p, false);
+ ObjectInspector inspector =
+ ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer writer = OrcFile.createWriter(fs, p, inspector,
+ 1000, CompressionKind.NONE, 100, 10000);
+ writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128));
+ writer.addUserMetadata("clobber", byteBuf(1,2,3));
+ writer.addUserMetadata("clobber", byteBuf(4,3,2,1));
+ ByteBuffer bigBuf = ByteBuffer.allocate(40000);
+ Random random = new Random(0);
+ random.nextBytes(bigBuf.array());
+ writer.addUserMetadata("big", bigBuf);
+ bigBuf.position(0);
+ writer.addRow(new BigRow(true, (byte) 127, (short) 1024, 42,
+ 42L * 1024 * 1024 * 1024, (float) 3.1415, -2.713, null,
+ null, null, null, null));
+ writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, p);
+ assertEquals(byteBuf(5,7,11,13,17,19), reader.getMetadataValue("clobber"));
+ assertEquals(byteBuf(1,2,3,4,5,6,7,-1,-2,127,-128),
+ reader.getMetadataValue("my.meta"));
+ assertEquals(bigBuf, reader.getMetadataValue("big"));
+ try {
+ reader.getMetadataValue("unknown");
+ assertTrue(false);
+ } catch (IllegalArgumentException iae) {
+ // PASS
+ }
+ int i = 0;
+ for(String key: reader.getMetadataKeys()) {
+ if ("my.meta".equals(key) ||
+ "clobber".equals(key) ||
+ "big".equals(key)) {
+ i += 1;
+ } else {
+ throw new IllegalArgumentException("unknown key " + key);
+ }
+ }
+ assertEquals(3, i);
+ }
+
+ /**
+ * We test union and timestamp separately since we need to make the
+ * object inspector manually. (The Hive reflection-based doesn't handle
+ * them properly.)
+ */
+ @Test
+ public void testUnionAndTimestamp() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path p = new Path(workDir, "file.orc");
+ fs.delete(p, false);
+ List types = new ArrayList();
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).
+ addFieldNames("time").addFieldNames("union").
+ addSubtypes(1).addSubtypes(2).build());
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).
+ build());
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION).
+ addSubtypes(3).addSubtypes(4).build());
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).
+ build());
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).
+ build());
+ ObjectInspector inspector = OrcStruct.createObjectInspector(0, types);
+ Writer writer = OrcFile.createWriter(fs, p, inspector,
+ 1000, CompressionKind.NONE, 100, 10000);
+ OrcStruct row = new OrcStruct(2);
+ OrcUnion union = new OrcUnion();
+ row.setFieldValue(1, union);
+ row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00"));
+ union.set((byte) 0, new IntWritable(42));
+ writer.addRow(row);
+ row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789"));
+ union.set((byte)1, new Text("hello"));
+ writer.addRow(row);
+ row.setFieldValue(0, null);
+ row.setFieldValue(1, null);
+ writer.addRow(row);
+ row.setFieldValue(1, union);
+ union.set((byte) 0, null);
+ writer.addRow(row);
+ union.set((byte) 1, null);
+ writer.addRow(row);
+ union.set((byte) 0, new IntWritable(200000));
+ row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00"));
+ writer.addRow(row);
+ for(int i=1900; i < 2200; ++i) {
+ row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i));
+ if ((i & 1) == 0) {
+ union.set((byte) 0, new IntWritable(i*i));
+ } else {
+ union.set((byte) 1, new Text(new Integer(i*i).toString()));
+ }
+ writer.addRow(row);
+ }
+ // let's add a lot of constant rows to test the rle
+ row.setFieldValue(0, null);
+ union.set((byte) 0, new IntWritable(1732050807));
+ for(int i=0; i < 1000; ++i) {
+ writer.addRow(row);
+ }
+ union.set((byte) 0, new IntWritable(0));
+ writer.addRow(row);
+ union.set((byte) 0, new IntWritable(10));
+ writer.addRow(row);
+ union.set((byte) 0, new IntWritable(138));
+ writer.addRow(row);
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, p);
+ assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
+ assertEquals(1309, reader.getNumberOfRows());
+ int stripeCount = 0;
+ int rowCount = 0;
+ long currentOffset = -1;
+ for(StripeInformation stripe: reader.getStripes()) {
+ stripeCount += 1;
+ rowCount += stripe.getNumberOfRows();
+ if (currentOffset < 0) {
+ currentOffset = stripe.getOffset() + stripe.getIndexLength() +
+ stripe.getDataLength() + stripe.getFooterLength();
+ } else {
+ assertEquals(currentOffset, stripe.getOffset());
+ currentOffset += stripe.getIndexLength() +
+ stripe.getDataLength() + stripe.getFooterLength();
+ }
+ }
+ assertEquals(reader.getNumberOfRows(), rowCount);
+ assertEquals(2, stripeCount);
+ assertEquals(reader.getContentLength(), currentOffset);
+ RecordReader rows = reader.rows(null);
+ assertEquals(0, rows.getRowNumber());
+ assertEquals(0.0, rows.getProgress(), 0.000001);
+ assertEquals(true, rows.hasNext());
+ row = (OrcStruct) rows.next(null);
+ inspector = reader.getObjectInspector();
+ assertEquals("struct",
+ inspector.getTypeName());
+ assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
+ row.getFieldValue(0));
+ union = (OrcUnion) row.getFieldValue(1);
+ assertEquals(0, union.getTag());
+ assertEquals(new IntWritable(42), union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
+ row.getFieldValue(0));
+ assertEquals(1, union.getTag());
+ assertEquals(new Text("hello"), union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(null, row.getFieldValue(0));
+ assertEquals(null, row.getFieldValue(1));
+ row = (OrcStruct) rows.next(row);
+ assertEquals(null, row.getFieldValue(0));
+ union = (OrcUnion) row.getFieldValue(1);
+ assertEquals(0, union.getTag());
+ assertEquals(null, union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(null, row.getFieldValue(0));
+ assertEquals(1, union.getTag());
+ assertEquals(null, union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"),
+ row.getFieldValue(0));
+ assertEquals(new IntWritable(200000), union.getObject());
+ for(int i=1900; i < 2200; ++i) {
+ row = (OrcStruct) rows.next(row);
+ assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
+ row.getFieldValue(0));
+ if ((i & 1) == 0) {
+ assertEquals(0, union.getTag());
+ assertEquals(new IntWritable(i*i), union.getObject());
+ } else {
+ assertEquals(1, union.getTag());
+ assertEquals(new Text(new Integer(i*i).toString()), union.getObject());
+ }
+ }
+ for(int i=0; i < 1000; ++i) {
+ row = (OrcStruct) rows.next(row);
+ assertEquals(new IntWritable(1732050807), union.getObject());
+ }
+ row = (OrcStruct) rows.next(row);
+ assertEquals(new IntWritable(0), union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(new IntWritable(10), union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(new IntWritable(138), union.getObject());
+ assertEquals(false, rows.hasNext());
+ assertEquals(1.0, rows.getProgress(), 0.00001);
+ assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
+ rows.close();
+ }
+
+ /**
+ * Read and write a randomly generated snappy file.
+ * @throws Exception
+ */
+ @Test
+ public void testSnappy() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path p = new Path(workDir, "file.orc");
+ fs.delete(p, false);
+ ObjectInspector inspector =
+ ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer writer = OrcFile.createWriter(fs, p, inspector,
+ 1000, CompressionKind.SNAPPY, 100, 10000);
+ Random rand = new Random(12);
+ for(int i=0; i < 10000; ++i) {
+ writer.addRow(new InnerStruct(rand.nextInt(),
+ Integer.toHexString(rand.nextInt())));
+ }
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, p);
+ RecordReader rows = reader.rows(null);
+ rand = new Random(12);
+ OrcStruct row = null;
+ for(int i=0; i < 10000; ++i) {
+ assertEquals(true, rows.hasNext());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(rand.nextInt(), ((IntWritable) row.getFieldValue(0)).get());
+ assertEquals(Integer.toHexString(rand.nextInt()),
+ row.getFieldValue(1).toString());
+ }
+ assertEquals(false, rows.hasNext());
+ rows.close();
+ }
+
+ /**
+ * Read and write a randomly generated snappy file.
+ * @throws Exception
+ */
+ @Test
+ public void testWithoutIndex() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path p = new Path(workDir, "file.orc");
+ fs.delete(p, false);
+ ObjectInspector inspector =
+ ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer writer = OrcFile.createWriter(fs, p, inspector,
+ 5000, CompressionKind.SNAPPY, 1000, 0);
+ Random rand = new Random(24);
+ for(int i=0; i < 10000; ++i) {
+ InnerStruct row = new InnerStruct(rand.nextInt(),
+ Integer.toBinaryString(rand.nextInt()));
+ for(int j=0; j< 5; ++j) {
+ writer.addRow(row);
+ }
+ }
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, p);
+ assertEquals(50000, reader.getNumberOfRows());
+ assertEquals(0, reader.getRowIndexStride());
+ StripeInformation stripe = reader.getStripes().iterator().next();
+ assertEquals(true, stripe.getDataLength() != 0);
+ assertEquals(0, stripe.getIndexLength());
+ RecordReader rows = reader.rows(null);
+ rand = new Random(24);
+ OrcStruct row = null;
+ for(int i=0; i < 10000; ++i) {
+ int intVal = rand.nextInt();
+ String strVal = Integer.toBinaryString(rand.nextInt());
+ for(int j=0; j < 5; ++j) {
+ assertEquals(true, rows.hasNext());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(intVal, ((IntWritable) row.getFieldValue(0)).get());
+ assertEquals(strVal, row.getFieldValue(1).toString());
+ }
+ }
+ assertEquals(false, rows.hasNext());
+ rows.close();
+ }
+}
Index: contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java
===================================================================
--- contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java (revision 0)
+++ contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java (working copy)
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestDynamicArray {
+
+ @Test
+ public void testByteArray() throws Exception {
+ DynamicByteArray dba = new DynamicByteArray(3, 10);
+ dba.add((byte) 0);
+ dba.add((byte) 1);
+ dba.set(3, (byte) 3);
+ dba.set(2, (byte) 2);
+ dba.add((byte) 4);
+ assertEquals("{0,1,2,3,4}", dba.toString());
+ assertEquals(5, dba.size());
+ byte[] val = new byte[0];
+ assertEquals(0, dba.compare(val, 0, 0, 2, 0));
+ assertEquals(-1, dba.compare(val, 0, 0, 2, 1));
+ val = new byte[]{3,42};
+ assertEquals(1, dba.compare(val, 0, 1, 2, 0));
+ assertEquals(1, dba.compare(val, 0, 1, 2, 1));
+ assertEquals(0, dba.compare(val, 0, 1, 3, 1));
+ assertEquals(-1, dba.compare(val, 0, 1, 3, 2));
+ assertEquals(1, dba.compare(val, 0, 2, 3, 1));
+ val = new byte[256];
+ for(int b=-128; b < 128; ++b) {
+ dba.add((byte) b);
+ val[b+128] = (byte) b;
+ }
+ assertEquals(0, dba.compare(val, 0, 256, 5, 256));
+ assertEquals(1, dba.compare(val, 0, 1, 0, 1));
+ assertEquals(1, dba.compare(val, 254, 1, 0, 1));
+ assertEquals(1, dba.compare(val, 120, 1, 64, 1));
+ }
+
+ @Test
+ public void testIntArray() throws Exception {
+ DynamicIntArray dia = new DynamicIntArray(10);
+ for(int i=0; i < 10000; ++i) {
+ dia.add(2*i);
+ }
+ assertEquals(10000, dia.size());
+ for(int i=0; i < 10000; ++i) {
+ assertEquals(2*i, dia.get(i));
+ }
+ dia.clear();
+ assertEquals(0, dia.size());
+ dia.add(3);
+ dia.add(12);
+ dia.add(65);
+ assertEquals("{3,12,65}", dia.toString());
+ for(int i=0; i < 5; ++i) {
+ dia.increment(i, 3);
+ }
+ assertEquals("{6,15,68,3,3}", dia.toString());
+ }
+}
Index: contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
===================================================================
--- contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java (revision 0)
+++ contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java (working copy)
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.util.Random;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+public class TestFileDump {
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir",
+ "target/test/tmp"));
+ Path resourceDir = new Path(System.getProperty("test.resources.dir",
+ "src/test/resources"));
+
+ static class MyRecord {
+ float f;
+ double d;
+ MyRecord(float f, double d) {
+ this.f = f;
+ this.d = d;
+ }
+ }
+
+ private static final String outputFilename = "/orc-file-dump.out";
+
+ private static void checkOutput(String expected,
+ String actual) throws Exception {
+ BufferedReader eStream =
+ new BufferedReader(new FileReader(expected));
+ BufferedReader aStream =
+ new BufferedReader(new FileReader(actual));
+ String line = eStream.readLine();
+ while (line != null) {
+ assertEquals(line, aStream.readLine());
+ line = eStream.readLine();
+ }
+ assertNull(eStream.readLine());
+ assertNull(aStream.readLine());
+ }
+
+ @Test
+ public void testDump() throws Exception {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path p = new Path(workDir, "file.orc");
+ fs.delete(p, false);
+ ObjectInspector inspector =
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRecord.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer writer = OrcFile.createWriter(fs, p, inspector,
+ 100000, CompressionKind.ZLIB, 10000, 10000);
+ Random r1 = new Random(1);
+ for(int i=0; i < 21000; ++i) {
+ writer.addRow(new MyRecord(r1.nextFloat(), r1.nextDouble()));
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ FileOutputStream myOut = new FileOutputStream(workDir +
+ "/orc-file-dump.out");
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{p.toString()});
+ System.out.flush();
+ System.setOut(origOut);
+
+ checkOutput(resourceDir + outputFilename, workDir + outputFilename);
+ }
+}
Index: contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java
===================================================================
--- contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java (revision 0)
+++ contrib/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java (working copy)
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestOrcStruct {
+
+ @Test
+ public void testStruct() throws Exception {
+ OrcStruct st1 = new OrcStruct(4);
+ OrcStruct st2 = new OrcStruct(4);
+ OrcStruct st3 = new OrcStruct(3);
+ st1.setFieldValue(0, "hop");
+ st1.setFieldValue(1, "on");
+ st1.setFieldValue(2, "pop");
+ st1.setFieldValue(3, 42);
+ assertEquals(false, st1.equals(null));
+ st2.setFieldValue(0, "hop");
+ st2.setFieldValue(1, "on");
+ st2.setFieldValue(2, "pop");
+ st2.setFieldValue(3, 42);
+ assertEquals(st1, st2);
+ st3.setFieldValue(0, "hop");
+ st3.setFieldValue(1, "on");
+ st3.setFieldValue(2, "pop");
+ assertEquals(false, st1.equals(st3));
+ assertEquals(11241, st1.hashCode());
+ assertEquals(st1.hashCode(), st2.hashCode());
+ assertEquals(11204, st3.hashCode());
+ assertEquals("{hop, on, pop, 42}", st1.toString());
+ st1.setFieldValue(3, null);
+ assertEquals(false, st1.equals(st2));
+ assertEquals(false, st2.equals(st1));
+ st2.setFieldValue(3, null);
+ assertEquals(st1, st2);
+ }
+
+ @Test
+ public void testInspectorFromTypeInfo() throws Exception {
+ TypeInfo typeInfo =
+ TypeInfoUtils.getTypeInfoFromTypeString("struct,c11:map,c12:uniontype" +
+ ",c13:array>");
+ StructObjectInspector inspector = (StructObjectInspector)
+ OrcStruct.createObjectInspector(typeInfo);
+ assertEquals("struct,c11:map,c12:union{int},c13:list>",
+ inspector.getTypeName());
+ assertEquals(null,
+ inspector.getAllStructFieldRefs().get(0).getFieldComment());
+ assertEquals(null, inspector.getStructFieldRef("UNKNOWN"));
+ OrcStruct s1 = new OrcStruct(13);
+ for(int i=0; i < 13; ++i) {
+ s1.setFieldValue(i, i);
+ }
+
+ List