diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java index 40f6256..255f30c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java @@ -62,10 +62,14 @@ private static Type convertType(final String name, final TypeInfo typeInfo, if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) .named(name); - } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) || - typeInfo.equals(TypeInfoFactory.shortTypeInfo) || - typeInfo.equals(TypeInfoFactory.byteTypeInfo)) { + } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) { return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name); + } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) { + return Types.primitive(PrimitiveTypeName.INT32, repetition) + .as(OriginalType.INT_16).named(name); + } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) { + return Types.primitive(PrimitiveTypeName.INT32, repetition) + .as(OriginalType.INT_8).named(name); } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) { return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) { diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java index 589b5b5..256031e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java @@ -20,17 +20,14 @@ import java.util.List; import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.junit.Test; - import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import org.apache.parquet.schema.OriginalType; -import org.apache.parquet.schema.Types; -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Type.Repetition; +import org.junit.Test; public class TestHiveSchemaConverter { @@ -63,17 +60,38 @@ private void testConversion(final String columnNamesStr, final String columnsTyp final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes); final MessageType expectedMT = MessageTypeParser.parseMessageType(expectedSchema); assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + expectedSchema, expectedMT, messageTypeFound); + + // Required to check the original types manually as PrimitiveType.equals does not care about it + List expectedFields = expectedMT.getFields(); + List actualFields = messageTypeFound.getFields(); + for (int i = 0, n = expectedFields.size(); i < n; ++i) { + OriginalType exp = expectedFields.get(i).getOriginalType(); + OriginalType act = actualFields.get(i).getOriginalType(); + assertEquals("Original types of the field do not match", exp, act); + } } @Test public void testSimpleType() throws Exception { testConversion( - "a,b,c", - "int,double,boolean", + "a,b,c,d", + "int,bigint,double,boolean", "message hive_schema {\n" + " optional int32 a;\n" - + " optional double b;\n" - + " optional boolean c;\n" + + " optional int64 b;\n" + + " optional double c;\n" + + " optional boolean d;\n" + + "}\n"); + } + + @Test + public void testSpecialIntType() throws Exception { + testConversion( + "a,b", + "tinyint,smallint", + "message hive_schema {\n" + + " optional int32 a (INT_8);\n" + + " optional int32 b (INT_16);\n" + "}\n"); }