diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java index 3261e4b..1bbdcec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java @@ -14,6 +14,7 @@ package org.apache.hadoop.hive.ql.io.parquet.convert; import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.ArrayWritable; import org.apache.parquet.io.api.GroupConverter; import org.apache.parquet.io.api.RecordMaterializer; @@ -31,9 +32,10 @@ private final HiveStructConverter root; - public DataWritableRecordConverter(final GroupType requestedSchema, final Map metadata) { + public DataWritableRecordConverter(final GroupType requestedSchema, final Map metadata, TypeInfo hiveTypeInfo) { this.root = new HiveStructConverter(requestedSchema, - MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)), metadata); + MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)), + metadata, hiveTypeInfo); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index 17849fa..6f98a70 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -49,7 +51,7 @@ EDOUBLE_CONVERTER(Double.TYPE) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new PrimitiveConverter() { @Override public void addDouble(final double value) { @@ -60,7 +62,7 @@ public void addDouble(final double value) { }, EBOOLEAN_CONVERTER(Boolean.TYPE) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new PrimitiveConverter() { @Override public void addBoolean(final boolean value) { @@ -71,29 +73,47 @@ public void addBoolean(final boolean value) { }, EFLOAT_CONVERTER(Float.TYPE) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { - return new PrimitiveConverter() { - @Override - public void addFloat(final float value) { - parent.set(index, new FloatWritable(value)); - } - }; + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { + if (hiveTypeInfo.equals(TypeInfoFactory.doubleTypeInfo)) { + return new PrimitiveConverter() { + @Override + public void addFloat(final float value) { + parent.set(index, new DoubleWritable((double)value)); + } + }; + } else { + return new PrimitiveConverter() { + @Override + public void addFloat(final float value) { + parent.set(index, new FloatWritable(value)); + } + }; + } } }, EINT32_CONVERTER(Integer.TYPE) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - parent.set(index, new IntWritable(value)); - } - }; + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { + if (hiveTypeInfo.equals(TypeInfoFactory.longTypeInfo)) { + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + parent.set(index, new LongWritable((long)value)); + } + }; + } else { + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + parent.set(index, new IntWritable(value)); + } + }; + } } }, EINT64_CONVERTER(Long.TYPE) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new PrimitiveConverter() { @Override public void addLong(final long value) { @@ -104,7 +124,7 @@ public void addLong(final long value) { }, EBINARY_CONVERTER(Binary.class) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new BinaryConverter(type, parent, index) { @Override protected BytesWritable convert(Binary binary) { @@ -115,7 +135,7 @@ protected BytesWritable convert(Binary binary) { }, ESTRING_CONVERTER(String.class) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new BinaryConverter(type, parent, index) { @Override protected Text convert(Binary binary) { @@ -126,7 +146,7 @@ protected Text convert(Binary binary) { }, EDECIMAL_CONVERTER(BigDecimal.class) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new BinaryConverter(type, parent, index) { @Override protected HiveDecimalWritable convert(Binary binary) { @@ -137,7 +157,7 @@ protected HiveDecimalWritable convert(Binary binary) { }, ETIMESTAMP_CONVERTER(TimestampWritable.class) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new BinaryConverter(type, parent, index) { @Override protected TimestampWritable convert(Binary binary) { @@ -154,7 +174,7 @@ protected TimestampWritable convert(Binary binary) { }, EDATE_CONVERTER(DateWritable.class) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new PrimitiveConverter() { @Override public void addInt(final int value) { @@ -174,26 +194,26 @@ private ETypeConverter(final Class type) { return _type; } - abstract PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent); + abstract PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo); public static PrimitiveConverter getNewConverter(final PrimitiveType type, final int index, - final ConverterParent parent) { + final ConverterParent parent, TypeInfo hiveTypeInfo) { if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) { //TODO- cleanup once parquet support Timestamp type annotation. - return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent); + return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent, hiveTypeInfo); } if (OriginalType.DECIMAL == type.getOriginalType()) { - return EDECIMAL_CONVERTER.getConverter(type, index, parent); + return EDECIMAL_CONVERTER.getConverter(type, index, parent, hiveTypeInfo); } else if (OriginalType.UTF8 == type.getOriginalType()) { - return ESTRING_CONVERTER.getConverter(type, index, parent); + return ESTRING_CONVERTER.getConverter(type, index, parent, hiveTypeInfo); } else if (OriginalType.DATE == type.getOriginalType()) { - return EDATE_CONVERTER.getConverter(type, index, parent); + return EDATE_CONVERTER.getConverter(type, index, parent, hiveTypeInfo); } Class javaType = type.getPrimitiveTypeName().javaType; for (final ETypeConverter eConverter : values()) { if (eConverter.getType() == javaType) { - return eConverter.getConverter(type, index, parent); + return eConverter.getConverter(type, index, parent, hiveTypeInfo); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java index 06f3d32..9b8c88c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java @@ -22,6 +22,9 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; import org.apache.parquet.io.api.Converter; @@ -37,21 +40,21 @@ public static HiveGroupConverter forMap(GroupType mapType, ConverterParent parent, - int index) { + int index, TypeInfo hiveTypeInfo) { return new HiveCollectionConverter( - mapType, parent, index, true /* its a map */ ); + mapType, parent, index, true /* its a map */, hiveTypeInfo ); } public static HiveGroupConverter forList(GroupType listType, ConverterParent parent, - int index) { + int index, TypeInfo hiveTypeInfo) { return new HiveCollectionConverter( - listType, parent, index, false /* not a map */); + listType, parent, index, false /* nUnknown hive type infoot a map */, hiveTypeInfo); } private HiveCollectionConverter(GroupType collectionType, ConverterParent parent, - int index, boolean isMap) { + int index, boolean isMap, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.collectionType = collectionType; this.parent = parent; @@ -59,12 +62,12 @@ private HiveCollectionConverter(GroupType collectionType, Type repeatedType = collectionType.getType(0); if (isMap) { this.innerConverter = new KeyValueConverter( - repeatedType.asGroupType(), this); + repeatedType.asGroupType(), this, hiveTypeInfo); } else if (isElementType(repeatedType, collectionType.getName())) { - this.innerConverter = getConverterFromDescription(repeatedType, 0, this); + this.innerConverter = getConverterFromDescription(repeatedType, 0, this, ((ListTypeInfo)hiveTypeInfo).getListElementTypeInfo()); } else { this.innerConverter = new ElementConverter( - repeatedType.asGroupType(), this); + repeatedType.asGroupType(), this, ((ListTypeInfo)hiveTypeInfo).getListElementTypeInfo()); } } @@ -97,13 +100,13 @@ public void set(int index, Writable value) { private final Converter valueConverter; private Writable[] keyValue = null; - public KeyValueConverter(GroupType keyValueType, HiveGroupConverter parent) { + public KeyValueConverter(GroupType keyValueType, HiveGroupConverter parent, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.parent = parent; this.keyConverter = getConverterFromDescription( - keyValueType.getType(0), 0, this); + keyValueType.getType(0), 0, this, ((MapTypeInfo) hiveTypeInfo).getMapKeyTypeInfo()); this.valueConverter = getConverterFromDescription( - keyValueType.getType(1), 1, this); + keyValueType.getType(1), 1, this, ((MapTypeInfo) hiveTypeInfo).getMapValueTypeInfo()); } @Override @@ -140,11 +143,11 @@ public void end() { private final Converter elementConverter; private Writable element = null; - public ElementConverter(GroupType repeatedType, HiveGroupConverter parent) { + public ElementConverter(GroupType repeatedType, HiveGroupConverter parent, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.parent = parent; this.elementConverter = getConverterFromDescription( - repeatedType.getType(0), 0, this); + repeatedType.getType(0), 0, this, hiveTypeInfo); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java index b1ca85a..e732f4a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java @@ -13,6 +13,7 @@ */ package org.apache.hadoop.hive.ql.io.parquet.convert; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.Writable; import org.apache.parquet.io.api.Converter; import org.apache.parquet.io.api.GroupConverter; @@ -36,39 +37,41 @@ public void setMetadata(Map metadata) { return metadata; } - protected static PrimitiveConverter getConverterFromDescription(PrimitiveType type, int index, ConverterParent parent) { + protected static PrimitiveConverter getConverterFromDescription(PrimitiveType type, int index, ConverterParent + parent, TypeInfo hiveTypeInfo) { if (type == null) { return null; } - return ETypeConverter.getNewConverter(type, index, parent); + return ETypeConverter.getNewConverter(type, index, parent, hiveTypeInfo); } - protected static HiveGroupConverter getConverterFromDescription(GroupType type, int index, ConverterParent parent) { + protected static HiveGroupConverter getConverterFromDescription(GroupType type, int index, ConverterParent parent, + TypeInfo hiveTypeInfo) { if (type == null) { return null; } OriginalType annotation = type.getOriginalType(); if (annotation == OriginalType.LIST) { - return HiveCollectionConverter.forList(type, parent, index); + return HiveCollectionConverter.forList(type, parent, index, hiveTypeInfo); } else if (annotation == OriginalType.MAP || annotation == OriginalType.MAP_KEY_VALUE) { - return HiveCollectionConverter.forMap(type, parent, index); + return HiveCollectionConverter.forMap(type, parent, index, hiveTypeInfo); } - return new HiveStructConverter(type, parent, index); + return new HiveStructConverter(type, parent, index, hiveTypeInfo); } - protected static Converter getConverterFromDescription(Type type, int index, ConverterParent parent) { + protected static Converter getConverterFromDescription(Type type, int index, ConverterParent parent, TypeInfo hiveTypeInfo) { if (type == null) { return null; } if (type.isPrimitive()) { - return getConverterFromDescription(type.asPrimitiveType(), index, parent); + return getConverterFromDescription(type.asPrimitiveType(), index, parent, hiveTypeInfo); } - return getConverterFromDescription(type.asGroupType(), index, parent); + return getConverterFromDescription(type.asGroupType(), index, parent, hiveTypeInfo); } public abstract void set(int index, Writable value); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java index 9c35a9f..52f23c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java @@ -17,6 +17,11 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; import org.apache.parquet.io.api.Converter; @@ -38,20 +43,21 @@ private final List repeatedConverters; private boolean reuseWritableArray = false; - public HiveStructConverter(final GroupType requestedSchema, final GroupType tableSchema, Map metadata) { - this(requestedSchema, null, 0, tableSchema); + public HiveStructConverter(final GroupType requestedSchema, final GroupType tableSchema, Map + metadata, TypeInfo hiveTypeInfo) { + this(requestedSchema, null, 0, tableSchema, hiveTypeInfo); setMetadata(metadata); this.reuseWritableArray = true; this.writables = new Writable[tableSchema.getFieldCount()]; } public HiveStructConverter(final GroupType groupType, final ConverterParent parent, - final int index) { - this(groupType, parent, index, groupType); + final int index, TypeInfo hiveTypeInfo) { + this(groupType, parent, index, groupType, hiveTypeInfo); } public HiveStructConverter(final GroupType selectedGroupType, - final ConverterParent parent, final int index, final GroupType containingGroupType) { + final ConverterParent parent, final int index, final GroupType containingGroupType, TypeInfo hiveTypeInfo) { if (parent != null) { setMetadata(parent.getMetadata()); } @@ -68,7 +74,8 @@ public HiveStructConverter(final GroupType selectedGroupType, Type subtype = selectedFields.get(i); if (containingGroupType.getFields().contains(subtype)) { int fieldIndex = containingGroupType.getFieldIndex(subtype.getName()); - converters[i] = getFieldConverter(subtype, fieldIndex); + TypeInfo hiveTI = getFieldTypeIgnoreCase(hiveTypeInfo, subtype.getName()); + converters[i] = getFieldConverter(subtype, fieldIndex, hiveTI); } else { throw new IllegalStateException("Group type [" + containingGroupType + "] does not contain requested field: " + subtype); @@ -76,20 +83,28 @@ public HiveStructConverter(final GroupType selectedGroupType, } } - private Converter getFieldConverter(Type type, int fieldIndex) { + private static TypeInfo getFieldTypeIgnoreCase(TypeInfo hiveTypeInfo, String fieldName) { + if(hiveTypeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) { + StructTypeInfo sti = (StructTypeInfo)hiveTypeInfo; + return sti.getStructFieldTypeInfo(fieldName); + } + throw new RuntimeException("Unknown hive type info " + hiveTypeInfo); + } + + private Converter getFieldConverter(Type type, int fieldIndex, TypeInfo hiveTypeInfo) { Converter converter; if (type.isRepetition(Type.Repetition.REPEATED)) { if (type.isPrimitive()) { converter = new Repeated.RepeatedPrimitiveConverter( - type.asPrimitiveType(), this, fieldIndex); + type.asPrimitiveType(), this, fieldIndex, hiveTypeInfo); } else { converter = new Repeated.RepeatedGroupConverter( - type.asGroupType(), this, fieldIndex); + type.asGroupType(), this, fieldIndex, ((ListTypeInfo)hiveTypeInfo).getListElementTypeInfo()); } repeatedConverters.add((Repeated) converter); } else { - converter = getConverterFromDescription(type, fieldIndex, this); + converter = getConverterFromDescription(type, fieldIndex, this, hiveTypeInfo); } return converter; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java index c0af291..a7fad71 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; import org.apache.parquet.column.Dictionary; @@ -65,12 +66,12 @@ public void setMetadata(Map metadata) { private final int index; private final List list = new ArrayList(); - public RepeatedPrimitiveConverter(PrimitiveType primitiveType, ConverterParent parent, int index) { + public RepeatedPrimitiveConverter(PrimitiveType primitiveType, ConverterParent parent, int index, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.primitiveType = primitiveType; this.parent = parent; this.index = index; - this.wrapped = HiveGroupConverter.getConverterFromDescription(primitiveType, 0, this); + this.wrapped = HiveGroupConverter.getConverterFromDescription(primitiveType, 0, this, hiveTypeInfo); } @Override @@ -149,12 +150,12 @@ public void set(int index, Writable value) { private final Map metadata = new HashMap(); - public RepeatedGroupConverter(GroupType groupType, ConverterParent parent, int index) { + public RepeatedGroupConverter(GroupType groupType, ConverterParent parent, int index, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.groupType = groupType; this.parent = parent; this.index = index; - this.wrapped = HiveGroupConverter.getConverterFromDescription(groupType, 0, this); + this.wrapped = HiveGroupConverter.getConverterFromDescription(groupType, 0, this, hiveTypeInfo); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index 97f228f..9476a2d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.util.StringUtils; @@ -52,7 +53,7 @@ public static final String HIVE_TABLE_AS_PARQUET_SCHEMA = "HIVE_TABLE_SCHEMA"; public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access"; - + private TypeInfo hiveTypeInfo; /** * From a string which columns names (including hive column), return a list * of string columns @@ -204,6 +205,8 @@ private static MessageType getSchemaByIndex(MessageType schema, List col if (columnNames != null) { List columnNamesList = getColumnNames(columnNames); + String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES); + List columnTypesList = getColumnTypes(columnTypes); MessageType tableSchema; if (indexAccess) { @@ -216,13 +219,12 @@ private static MessageType getSchemaByIndex(MessageType schema, List col tableSchema = getSchemaByIndex(fileSchema, columnNamesList, indexSequence); } else { - String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES); - List columnTypesList = getColumnTypes(columnTypes); tableSchema = getSchemaByName(fileSchema, columnNamesList, columnTypesList); } contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, tableSchema.toString()); + this.hiveTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList); List indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration); if (!ColumnProjectionUtils.isReadAllColumns(configuration) && !indexColumnsWanted.isEmpty()) { @@ -262,6 +264,6 @@ private static MessageType getSchemaByIndex(MessageType schema, List col metadata.put(key, String.valueOf(HiveConf.getBoolVar( configuration, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION))); } - return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata); + return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata, hiveTypeInfo); } } diff --git ql/src/test/queries/clientpositive/parquet_schema_type_evolution.q ql/src/test/queries/clientpositive/parquet_schema_type_evolution.q new file mode 100644 index 0000000..b51ca65 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_schema_type_evolution.q @@ -0,0 +1,29 @@ +-- Some tables might have different schema compare to file schema. +-- This test is verifying this. + +DROP TABLE NewStructField; +DROP TABLE NewStructFieldTable; + +CREATE TABLE NewStructField(a struct, a2:struct>) STORED AS PARQUET; + +INSERT OVERWRITE TABLE NewStructField SELECT named_struct('a1', map('k1','v1'), 'a2', named_struct('e1',5)) FROM srcpart LIMIT 5; + +DESCRIBE NewStructField; +SELECT * FROM NewStructField; + + + +-- Adds new fields to the struct types +ALTER TABLE NewStructField REPLACE COLUMNS (a struct, a2:struct, a3:int>, b +int); + +DESCRIBE NewStructField; +SELECT a.a2.e1 FROM NewStructField; + +-- Makes sure that new parquet tables contain the new struct field +CREATE TABLE NewStructFieldTable STORED AS PARQUET AS SELECT * FROM NewStructField; +DESCRIBE NewStructFieldTable; +SELECT * FROM NewStructFieldTable; + +DROP TABLE NewStructField; +DROP TABLE NewStructFieldTable; diff --git ql/src/test/results/clientpositive/parquet_schema_type_evolution.q.out ql/src/test/results/clientpositive/parquet_schema_type_evolution.q.out new file mode 100644 index 0000000..6594e7c --- /dev/null +++ ql/src/test/results/clientpositive/parquet_schema_type_evolution.q.out @@ -0,0 +1,141 @@ +PREHOOK: query: -- Some tables might have different schema compare to file schema. +-- This test is verifying this. + +DROP TABLE NewStructField +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Some tables might have different schema compare to file schema. +-- This test is verifying this. + +DROP TABLE NewStructField +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE NewStructFieldTable +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE NewStructFieldTable +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE NewStructField(a struct, a2:struct>) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@NewStructField +POSTHOOK: query: CREATE TABLE NewStructField(a struct, a2:struct>) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@NewStructField +PREHOOK: query: INSERT OVERWRITE TABLE NewStructField SELECT named_struct('a1', map('k1','v1'), 'a2', named_struct('e1',5)) FROM srcpart LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@newstructfield +POSTHOOK: query: INSERT OVERWRITE TABLE NewStructField SELECT named_struct('a1', map('k1','v1'), 'a2', named_struct('e1',5)) FROM srcpart LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@newstructfield +POSTHOOK: Lineage: newstructfield.a EXPRESSION [] +PREHOOK: query: DESCRIBE NewStructField +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@newstructfield +POSTHOOK: query: DESCRIBE NewStructField +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@newstructfield +a struct,a2:struct> +PREHOOK: query: SELECT * FROM NewStructField +PREHOOK: type: QUERY +PREHOOK: Input: default@newstructfield +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM NewStructField +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newstructfield +#### A masked pattern was here #### +{"a1":{"k1":"v1"},"a2":{"e1":5}} +{"a1":{"k1":"v1"},"a2":{"e1":5}} +{"a1":{"k1":"v1"},"a2":{"e1":5}} +{"a1":{"k1":"v1"},"a2":{"e1":5}} +{"a1":{"k1":"v1"},"a2":{"e1":5}} +PREHOOK: query: -- Adds new fields to the struct types +ALTER TABLE NewStructField REPLACE COLUMNS (a struct, a2:struct, a3:int>, b +int) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@newstructfield +PREHOOK: Output: default@newstructfield +POSTHOOK: query: -- Adds new fields to the struct types +ALTER TABLE NewStructField REPLACE COLUMNS (a struct, a2:struct, a3:int>, b +int) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@newstructfield +POSTHOOK: Output: default@newstructfield +PREHOOK: query: DESCRIBE NewStructField +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@newstructfield +POSTHOOK: query: DESCRIBE NewStructField +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@newstructfield +a struct,a2:struct,a3:int> +b int +PREHOOK: query: SELECT a.a2.e1 FROM NewStructField +PREHOOK: type: QUERY +PREHOOK: Input: default@newstructfield +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.a2.e1 FROM NewStructField +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newstructfield +#### A masked pattern was here #### +5 +5 +5 +5 +5 +PREHOOK: query: -- Makes sure that new parquet tables contain the new struct field +CREATE TABLE NewStructFieldTable STORED AS PARQUET AS SELECT * FROM NewStructField +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@newstructfield +PREHOOK: Output: database:default +PREHOOK: Output: default@NewStructFieldTable +POSTHOOK: query: -- Makes sure that new parquet tables contain the new struct field +CREATE TABLE NewStructFieldTable STORED AS PARQUET AS SELECT * FROM NewStructField +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@newstructfield +POSTHOOK: Output: database:default +POSTHOOK: Output: default@NewStructFieldTable +PREHOOK: query: DESCRIBE NewStructFieldTable +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@newstructfieldtable +POSTHOOK: query: DESCRIBE NewStructFieldTable +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@newstructfieldtable +a struct,a2:struct,a3:int> +b int +PREHOOK: query: SELECT * FROM NewStructFieldTable +PREHOOK: type: QUERY +PREHOOK: Input: default@newstructfieldtable +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM NewStructFieldTable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newstructfieldtable +#### A masked pattern was here #### +{"a1":{"k1":"v1"},"a2":{"e1":5},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5},"a3":null} NULL +PREHOOK: query: DROP TABLE NewStructField +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@newstructfield +PREHOOK: Output: default@newstructfield +POSTHOOK: query: DROP TABLE NewStructField +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@newstructfield +POSTHOOK: Output: default@newstructfield +PREHOOK: query: DROP TABLE NewStructFieldTable +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@newstructfieldtable +PREHOOK: Output: default@newstructfieldtable +POSTHOOK: query: DROP TABLE NewStructFieldTable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@newstructfieldtable +POSTHOOK: Output: default@newstructfieldtable