diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index a6448b6..a2dc4e3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -46,12 +46,14 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.protobuf.CodedInputStream; +import com.google.protobuf.InvalidProtocolBufferException; public class ReaderImpl implements Reader { private static final Log LOG = LogFactory.getLog(ReaderImpl.class); private static final int DIRECTORY_SIZE_GUESS = 16 * 1024; + private static final int DEFAULT_PROTOBUF_MESSAGE_LIMIT = 64 << 20; // 64MB protected final FileSystem fileSystem; protected final Path path; @@ -450,7 +452,7 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, final CompressionCodec codec; final int bufferSize; final int metadataSize; - final OrcProto.Metadata metadata; + OrcProto.Metadata metadata = null; final OrcProto.Footer footer; final ObjectInspector inspector; @@ -468,7 +470,27 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, InputStream instream = InStream.create("metadata", Lists.newArrayList( new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize); - this.metadata = OrcProto.Metadata.parseFrom(instream); + CodedInputStream in = CodedInputStream.newInstance(instream); + int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT; + do { + try { + in.setSizeLimit(msgLimit); + this.metadata = OrcProto.Metadata.parseFrom(in); + } catch (InvalidProtocolBufferException e) { + if (e.getMessage().contains("Protocol message was too large")) { + LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" + + " size of the coded input stream." ); + msgLimit = msgLimit << 1; + // we must have failed in the middle of reading instream and instream doesn't support + // resetting the stream + instream = InStream.create("metadata", Lists.newArrayList( + new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize); + in = CodedInputStream.newInstance(instream); + } else { + throw e; + } + } + } while (metadata == null); footerBuffer.position(position + metadataSize); footerBuffer.limit(position + metadataSize + footerBufferSize);