diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index 0e60f6e..dcd46bd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -32,6 +32,7 @@ import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.util.StringUtils; +import parquet.hadoop.api.InitContext; import parquet.hadoop.api.ReadSupport; import parquet.io.api.RecordMaterializer; import parquet.schema.GroupType; @@ -190,13 +191,13 @@ private static MessageType getSchemaByIndex(MessageType schema, List col /** * It creates the readContext for Parquet side with the requested schema during the init phase. * - * @param configuration needed to get the wanted columns - * @param keyValueMetaData // unused - * @param fileSchema parquet file schema + * @param context * @return the parquet ReadContext */ @Override - public ReadContext init(final Configuration configuration, final Map keyValueMetaData, final MessageType fileSchema) { + public parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) { + Configuration configuration = context.getConfiguration(); + MessageType fileSchema = context.getFileSchema(); String columnNames = configuration.get(IOConstants.COLUMNS); Map contextMetadata = new HashMap(); boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index b99fd56..fe4e366 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -15,7 +15,12 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -42,6 +47,7 @@ import parquet.hadoop.ParquetFileReader; import parquet.hadoop.ParquetInputFormat; import parquet.hadoop.ParquetInputSplit; +import parquet.hadoop.api.InitContext; import parquet.hadoop.api.ReadSupport.ReadContext; import parquet.hadoop.metadata.BlockMetaData; import parquet.hadoop.metadata.FileMetaData; @@ -243,8 +249,8 @@ protected ParquetInputSplit getSplit( final List blocks = parquetMetadata.getBlocks(); final FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); - final ReadContext readContext = new DataWritableReadSupport() - .init(jobConf, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema()); + final ReadContext readContext = new DataWritableReadSupport().init(new InitContext(jobConf, + null, fileMetaData.getSchema())); schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount(); final List splitGroup = new ArrayList();