diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java index e9d1131..bdbd66e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java @@ -13,12 +13,11 @@ */ package org.apache.hadoop.hive.ql.io.parquet.convert; -import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.io.ArrayWritable; import parquet.io.api.GroupConverter; import parquet.io.api.RecordMaterializer; import parquet.schema.GroupType; -import parquet.schema.MessageTypeParser; +import parquet.schema.MessageType; import java.util.Map; @@ -31,9 +30,11 @@ private final HiveStructConverter root; - public DataWritableRecordConverter(final GroupType requestedSchema, final Map metadata) { + public DataWritableRecordConverter(final GroupType requestedSchema, + final MessageType tableSchema, + final Map metadata) { this.root = new HiveStructConverter(requestedSchema, - MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)), metadata); + tableSchema, metadata); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index dcd46bd..1ed9092 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -52,6 +52,7 @@ public static final String HIVE_TABLE_AS_PARQUET_SCHEMA = "HIVE_TABLE_SCHEMA"; public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access"; + private MessageType tableSchema; /** * From a string which columns names (including hive column), return a list @@ -205,7 +206,6 @@ private static MessageType getSchemaByIndex(MessageType schema, List col if (columnNames != null) { List columnNamesList = getColumnNames(columnNames); - MessageType tableSchema; if (indexAccess) { List indexSequence = new ArrayList(); @@ -229,6 +229,7 @@ private static MessageType getSchemaByIndex(MessageType schema, List col return new ReadContext(requestedSchemaByUser, contextMetadata); } else { + tableSchema = fileSchema; contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, fileSchema.toString()); return new ReadContext(fileSchema, contextMetadata); } @@ -258,6 +259,11 @@ private static MessageType getSchemaByIndex(MessageType schema, List col metadata.put(key, String.valueOf(HiveConf.getBoolVar( configuration, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION))); } - return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata); + return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema, + metadata); + } + + public MessageType getTableSchema() { + return tableSchema; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index fe4e366..c0a9847 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -249,10 +249,10 @@ protected ParquetInputSplit getSplit( final List blocks = parquetMetadata.getBlocks(); final FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); - final ReadContext readContext = new DataWritableReadSupport().init(new InitContext(jobConf, + DataWritableReadSupport readSupport = new DataWritableReadSupport(); + final ReadContext readContext = readSupport.init(new InitContext(jobConf, null, fileMetaData.getSchema())); - schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() - .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount(); + schemaSize = readSupport.getTableSchema().getFieldCount(); final List splitGroup = new ArrayList(); final long splitStart = ((FileSplit) oldSplit).getStart(); final long splitLength = ((FileSplit) oldSplit).getLength();