diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java index b5e9c8b..d96d5bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import parquet.schema.ConversionPatterns; import parquet.schema.GroupType; import parquet.schema.MessageType; import parquet.schema.OriginalType; @@ -118,8 +119,7 @@ private static GroupType convertMapType(final String name, final MapTypeInfo typ typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED); final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(), typeInfo.getMapValueTypeInfo()); - return listWrapper(name, OriginalType.MAP_KEY_VALUE, - new GroupType(Repetition.REPEATED, ParquetHiveSerDe.MAP.toString(), keyType, valueType)); + return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType); } private static GroupType listWrapper(final String name, final OriginalType originalType, diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java index 0b25f6e..d39aa1b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java @@ -26,6 +26,8 @@ import parquet.schema.MessageType; import parquet.schema.MessageTypeParser; +import parquet.schema.OriginalType; +import parquet.schema.Type.Repetition; public class TestHiveSchemaConverter { @@ -111,4 +113,26 @@ public void testMap() throws Exception { + " }\n" + "}\n"); } + + @Test + public void testMapOriginalType() throws Exception { + final String hiveColumnTypes = "map"; + final String hiveColumnNames = "mapCol"; + final List columnNames = createHiveColumnsFrom(hiveColumnNames); + final List columnTypes = createHiveTypeInfoFrom(hiveColumnTypes); + final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes); + // this messageType only has one optional field, whose name is mapCol, original Type is MAP + assertEquals(1, messageTypeFound.getFieldCount()); + parquet.schema.Type topLevel = messageTypeFound.getFields().get(0); + assertEquals("mapCol",topLevel.getName()); + assertEquals(OriginalType.MAP, topLevel.getOriginalType()); + assertEquals(Repetition.OPTIONAL, topLevel.getRepetition()); + + assertEquals(1, topLevel.asGroupType().getFieldCount()); + parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0); + //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE; + assertEquals("map", secondLevel.getName()); + assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType()); + assertEquals(Repetition.REPEATED, secondLevel.getRepetition()); + } }