diff --git orc/src/java/org/apache/orc/impl/DataReaderProperties.java orc/src/java/org/apache/orc/impl/DataReaderProperties.java index bb73d53..1e30ec8 100644 --- orc/src/java/org/apache/orc/impl/DataReaderProperties.java +++ orc/src/java/org/apache/orc/impl/DataReaderProperties.java @@ -1,6 +1,7 @@ package org.apache.orc.impl; import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.orc.CompressionKind; @@ -15,6 +16,7 @@ private final boolean zeroCopy; private final int typeCount; private final int bufferSize; + private final FSDataInputStream fis; private DataReaderProperties(Builder builder) { this.fileSystem = builder.fileSystem; @@ -23,6 +25,7 @@ private DataReaderProperties(Builder builder) { this.zeroCopy = builder.zeroCopy; this.typeCount = builder.typeCount; this.bufferSize = builder.bufferSize; + this.fis = builder.fis; } public FileSystem getFileSystem() { @@ -33,6 +36,10 @@ public Path getPath() { return path; } + public FSDataInputStream getFileInputStream() { + return fis; + } + public CompressionKind getCompression() { return compression; } @@ -56,6 +63,7 @@ public static Builder builder() { public static class Builder { private FileSystem fileSystem; + private FSDataInputStream fis; private Path path; private CompressionKind compression; private boolean zeroCopy; @@ -96,6 +104,11 @@ public Builder withBufferSize(int value) { return this; } + public Builder withFsDataInputStream(FSDataInputStream fis) { + this.fis = fis; + return this; + } + public DataReaderProperties build() { Preconditions.checkNotNull(fileSystem); Preconditions.checkNotNull(path); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index b7437be..76a6433 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -90,6 +90,7 @@ // Same for metastore cache - maintains the same background buffer, but includes postscript. // This will only be set if the file footer/metadata was read from disk. private final ByteBuffer footerMetaAndPsBuffer; + protected final FSDataInputStream fis; public static class StripeInformationImpl implements StripeInformation { @@ -338,6 +339,7 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException { this.conf = options.getConfiguration(); this.maxLength = options.getMaxLength(); + this.fis = fs.open(path); FileMetadata fileMetadata = options.getFileMetadata(); if (fileMetadata != null) { this.compressionKind = fileMetadata.getCompressionKind(); @@ -487,11 +489,10 @@ public static FooterInfo extractMetaInfoFromFooter( return ps; } - private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, + private FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, Path path, long maxFileLength ) throws IOException { - FSDataInputStream file = fs.open(path); ByteBuffer buffer = null, fullFooterBuffer = null; OrcProto.PostScript ps = null; OrcFile.WriterVersion writerVersion = null; @@ -508,14 +509,14 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS); buffer = ByteBuffer.allocate(readSize); assert buffer.position() == 0; - file.readFully((size - readSize), + this.fis.readFully((size - readSize), buffer.array(), buffer.arrayOffset(), readSize); buffer.position(0); //read the PostScript //get length of PostScript int psLen = buffer.get(readSize - 1) & 0xff; - ensureOrcFooter(file, path, psLen, buffer); + ensureOrcFooter(this.fis, path, psLen, buffer); int psOffset = readSize - 1 - psLen; ps = extractPostScript(buffer, path, psLen, psOffset); @@ -528,7 +529,7 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, if (extra > 0) { //more bytes need to be read, seek back to the right place and read extra bytes ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize); - file.readFully((size - readSize - extra), extraBuf.array(), + this.fis.readFully((size - readSize - extra), extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra); extraBuf.position(extra); //append with already read bytes @@ -547,11 +548,7 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, // remember position for later TODO: what later? this comment is useless buffer.mark(); } finally { - try { - file.close(); - } catch (IOException ex) { - LOG.error("Failed to close the file after another error", ex); - } + // no-op } return new FileMetaInfo( diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 2199b11..c1bbb16 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -194,6 +194,7 @@ protected RecordReaderImpl(ReaderImpl fileReader, .withPath(path) .withTypeCount(types.size()) .withZeroCopy(zeroCopy) + .withFsDataInputStream(fileReader.fis) .build()); } else { dataReader = options.getDataReader(); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java index 4192588..c438ad9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java @@ -76,6 +76,7 @@ private DefaultDataReader(DefaultDataReader other) { this.path = other.path; this.useZeroCopy = other.useZeroCopy; this.codec = other.codec; + this.file = other.file; } private DefaultDataReader(DataReaderProperties properties) { @@ -90,6 +91,7 @@ private DefaultDataReader(DataReaderProperties properties) { } else { this.pool = null; } + this.file = properties.getFileInputStream(); } @Override @@ -110,6 +112,7 @@ public OrcIndex readRowIndex(StripeInformation stripe, boolean[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices ) throws IOException { + //file is already populated, so should be able to save file open call. if (file == null) { open(); }