diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java index 033e26a238..6b2f1482ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java @@ -26,6 +26,7 @@ import org.apache.parquet.filter2.compat.FilterCompat; import org.apache.parquet.filter2.compat.RowGroupFilter; import org.apache.parquet.filter2.predicate.FilterPredicate; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetInputFormat; import org.apache.parquet.hadoop.ParquetInputSplit; @@ -73,10 +74,19 @@ protected ParquetInputSplit getSplit( if (oldSplit instanceof FileSplit) { final Path finalPath = ((FileSplit) oldSplit).getPath(); jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent()); + final long splitStart = ((FileSplit) oldSplit).getStart(); + final long splitLength = ((FileSplit) oldSplit).getLength(); + + final ParquetMetadata parquetMetadata; + if (splitStart == 0 && splitLength == 0) { + // Can be from dummy split for stats. ReadFooter for the complete the file. + parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath); + + } else { + parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath, + ParquetMetadataConverter.range(splitStart, splitStart + splitLength)); + } - // TODO enable MetadataFilter by using readFooter(Configuration configuration, Path file, - // MetadataFilter filter) API - final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath); final List blocks = parquetMetadata.getBlocks(); final FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); @@ -93,8 +103,6 @@ protected ParquetInputSplit getSplit( schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount(); final List splitGroup = new ArrayList(); - final long splitStart = ((FileSplit) oldSplit).getStart(); - final long splitLength = ((FileSplit) oldSplit).getLength(); for (final BlockMetaData block : blocks) { final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) {