diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e760ed5..ef5860a 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1054,6 +1054,11 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "data is read remotely (from the client or HS2 machine) and sent to all the tasks."), HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000, "Max cache size for keeping meta info about orc splits cached in the client."), + HIVE_ORC_CACHE_USE_SOFT_REFERENCES("hive.orc.cache.use.soft.references", false, + "By default, the cache that ORC input format uses to store orc file footer use hard\n" + + "references for the cached object. Setting this to true can help avoid out of memory\n" + + "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + + "overall query performance."), HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10, "How many threads orc should use to create splits in parallel."), HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false, diff --git a/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java b/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java index a0336d2..57a930f 100644 --- a/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java +++ b/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java @@ -18940,6 +18940,875 @@ public Builder setMagicBytes( // @@protoc_insertion_point(class_scope:orc.proto.PostScript) } + public interface FileTailOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional .orc.proto.PostScript postscript = 1; + /** + * optional .orc.proto.PostScript postscript = 1; + */ + boolean hasPostscript(); + /** + * optional .orc.proto.PostScript postscript = 1; + */ + org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript getPostscript(); + /** + * optional .orc.proto.PostScript postscript = 1; + */ + org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder(); + + // optional .orc.proto.Footer footer = 2; + /** + * optional .orc.proto.Footer footer = 2; + */ + boolean hasFooter(); + /** + * optional .orc.proto.Footer footer = 2; + */ + org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer getFooter(); + /** + * optional .orc.proto.Footer footer = 2; + */ + org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder getFooterOrBuilder(); + + // optional uint64 fileLength = 3; + /** + * optional uint64 fileLength = 3; + */ + boolean hasFileLength(); + /** + * optional uint64 fileLength = 3; + */ + long getFileLength(); + + // optional uint64 postscriptLength = 4; + /** + * optional uint64 postscriptLength = 4; + */ + boolean hasPostscriptLength(); + /** + * optional uint64 postscriptLength = 4; + */ + long getPostscriptLength(); + } + /** + * Protobuf type {@code orc.proto.FileTail} + * + *
+   * The contents of the file tail that must be serialized.
+   * 
+ */ + public static final class FileTail extends + com.google.protobuf.GeneratedMessage + implements FileTailOrBuilder { + // Use FileTail.newBuilder() to construct. + private FileTail(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private FileTail(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final FileTail defaultInstance; + public static FileTail getDefaultInstance() { + return defaultInstance; + } + + public FileTail getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private FileTail( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder subBuilder = null; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + subBuilder = postscript_.toBuilder(); + } + postscript_ = input.readMessage(org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.PARSER, extensionRegistry); + if (subBuilder != null) { + subBuilder.mergeFrom(postscript_); + postscript_ = subBuilder.buildPartial(); + } + bitField0_ |= 0x00000001; + break; + } + case 18: { + org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder subBuilder = null; + if (((bitField0_ & 0x00000002) == 0x00000002)) { + subBuilder = footer_.toBuilder(); + } + footer_ = input.readMessage(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.PARSER, extensionRegistry); + if (subBuilder != null) { + subBuilder.mergeFrom(footer_); + footer_ = subBuilder.buildPartial(); + } + bitField0_ |= 0x00000002; + break; + } + case 24: { + bitField0_ |= 0x00000004; + fileLength_ = input.readUInt64(); + break; + } + case 32: { + bitField0_ |= 0x00000008; + postscriptLength_ = input.readUInt64(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public FileTail parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new FileTail(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // optional .orc.proto.PostScript postscript = 1; + public static final int POSTSCRIPT_FIELD_NUMBER = 1; + private org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript postscript_; + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public boolean hasPostscript() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript getPostscript() { + return postscript_; + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder() { + return postscript_; + } + + // optional .orc.proto.Footer footer = 2; + public static final int FOOTER_FIELD_NUMBER = 2; + private org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer footer_; + /** + * optional .orc.proto.Footer footer = 2; + */ + public boolean hasFooter() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer getFooter() { + return footer_; + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder getFooterOrBuilder() { + return footer_; + } + + // optional uint64 fileLength = 3; + public static final int FILELENGTH_FIELD_NUMBER = 3; + private long fileLength_; + /** + * optional uint64 fileLength = 3; + */ + public boolean hasFileLength() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * optional uint64 fileLength = 3; + */ + public long getFileLength() { + return fileLength_; + } + + // optional uint64 postscriptLength = 4; + public static final int POSTSCRIPTLENGTH_FIELD_NUMBER = 4; + private long postscriptLength_; + /** + * optional uint64 postscriptLength = 4; + */ + public boolean hasPostscriptLength() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + /** + * optional uint64 postscriptLength = 4; + */ + public long getPostscriptLength() { + return postscriptLength_; + } + + private void initFields() { + postscript_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance(); + footer_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance(); + fileLength_ = 0L; + postscriptLength_ = 0L; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeMessage(1, postscript_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeMessage(2, footer_); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeUInt64(3, fileLength_); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + output.writeUInt64(4, postscriptLength_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(1, postscript_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(2, footer_); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeUInt64Size(3, fileLength_); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + size += com.google.protobuf.CodedOutputStream + .computeUInt64Size(4, postscriptLength_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code orc.proto.FileTail} + * + *
+     * The contents of the file tail that must be serialized.
+     * 
+ */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTailOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.Builder.class); + } + + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + getPostscriptFieldBuilder(); + getFooterFieldBuilder(); + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + if (postscriptBuilder_ == null) { + postscript_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance(); + } else { + postscriptBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000001); + if (footerBuilder_ == null) { + footer_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance(); + } else { + footerBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000002); + fileLength_ = 0L; + bitField0_ = (bitField0_ & ~0x00000004); + postscriptLength_ = 0L; + bitField0_ = (bitField0_ & ~0x00000008); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor; + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail getDefaultInstanceForType() { + return org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.getDefaultInstance(); + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail build() { + org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail buildPartial() { + org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + if (postscriptBuilder_ == null) { + result.postscript_ = postscript_; + } else { + result.postscript_ = postscriptBuilder_.build(); + } + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + if (footerBuilder_ == null) { + result.footer_ = footer_; + } else { + result.footer_ = footerBuilder_.build(); + } + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.fileLength_ = fileLength_; + if (((from_bitField0_ & 0x00000008) == 0x00000008)) { + to_bitField0_ |= 0x00000008; + } + result.postscriptLength_ = postscriptLength_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail) { + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail other) { + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.getDefaultInstance()) return this; + if (other.hasPostscript()) { + mergePostscript(other.getPostscript()); + } + if (other.hasFooter()) { + mergeFooter(other.getFooter()); + } + if (other.hasFileLength()) { + setFileLength(other.getFileLength()); + } + if (other.hasPostscriptLength()) { + setPostscriptLength(other.getPostscriptLength()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // optional .orc.proto.PostScript postscript = 1; + private org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript postscript_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance(); + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder> postscriptBuilder_; + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public boolean hasPostscript() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript getPostscript() { + if (postscriptBuilder_ == null) { + return postscript_; + } else { + return postscriptBuilder_.getMessage(); + } + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public Builder setPostscript(org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript value) { + if (postscriptBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + postscript_ = value; + onChanged(); + } else { + postscriptBuilder_.setMessage(value); + } + bitField0_ |= 0x00000001; + return this; + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public Builder setPostscript( + org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder builderForValue) { + if (postscriptBuilder_ == null) { + postscript_ = builderForValue.build(); + onChanged(); + } else { + postscriptBuilder_.setMessage(builderForValue.build()); + } + bitField0_ |= 0x00000001; + return this; + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public Builder mergePostscript(org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript value) { + if (postscriptBuilder_ == null) { + if (((bitField0_ & 0x00000001) == 0x00000001) && + postscript_ != org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance()) { + postscript_ = + org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.newBuilder(postscript_).mergeFrom(value).buildPartial(); + } else { + postscript_ = value; + } + onChanged(); + } else { + postscriptBuilder_.mergeFrom(value); + } + bitField0_ |= 0x00000001; + return this; + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public Builder clearPostscript() { + if (postscriptBuilder_ == null) { + postscript_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance(); + onChanged(); + } else { + postscriptBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder getPostscriptBuilder() { + bitField0_ |= 0x00000001; + onChanged(); + return getPostscriptFieldBuilder().getBuilder(); + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder() { + if (postscriptBuilder_ != null) { + return postscriptBuilder_.getMessageOrBuilder(); + } else { + return postscript_; + } + } + /** + * optional .orc.proto.PostScript postscript = 1; + */ + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder> + getPostscriptFieldBuilder() { + if (postscriptBuilder_ == null) { + postscriptBuilder_ = new com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder>( + postscript_, + getParentForChildren(), + isClean()); + postscript_ = null; + } + return postscriptBuilder_; + } + + // optional .orc.proto.Footer footer = 2; + private org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer footer_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance(); + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer, org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder> footerBuilder_; + /** + * optional .orc.proto.Footer footer = 2; + */ + public boolean hasFooter() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer getFooter() { + if (footerBuilder_ == null) { + return footer_; + } else { + return footerBuilder_.getMessage(); + } + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public Builder setFooter(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer value) { + if (footerBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + footer_ = value; + onChanged(); + } else { + footerBuilder_.setMessage(value); + } + bitField0_ |= 0x00000002; + return this; + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public Builder setFooter( + org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder builderForValue) { + if (footerBuilder_ == null) { + footer_ = builderForValue.build(); + onChanged(); + } else { + footerBuilder_.setMessage(builderForValue.build()); + } + bitField0_ |= 0x00000002; + return this; + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public Builder mergeFooter(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer value) { + if (footerBuilder_ == null) { + if (((bitField0_ & 0x00000002) == 0x00000002) && + footer_ != org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance()) { + footer_ = + org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.newBuilder(footer_).mergeFrom(value).buildPartial(); + } else { + footer_ = value; + } + onChanged(); + } else { + footerBuilder_.mergeFrom(value); + } + bitField0_ |= 0x00000002; + return this; + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public Builder clearFooter() { + if (footerBuilder_ == null) { + footer_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance(); + onChanged(); + } else { + footerBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000002); + return this; + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder getFooterBuilder() { + bitField0_ |= 0x00000002; + onChanged(); + return getFooterFieldBuilder().getBuilder(); + } + /** + * optional .orc.proto.Footer footer = 2; + */ + public org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder getFooterOrBuilder() { + if (footerBuilder_ != null) { + return footerBuilder_.getMessageOrBuilder(); + } else { + return footer_; + } + } + /** + * optional .orc.proto.Footer footer = 2; + */ + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer, org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder> + getFooterFieldBuilder() { + if (footerBuilder_ == null) { + footerBuilder_ = new com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer, org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder>( + footer_, + getParentForChildren(), + isClean()); + footer_ = null; + } + return footerBuilder_; + } + + // optional uint64 fileLength = 3; + private long fileLength_ ; + /** + * optional uint64 fileLength = 3; + */ + public boolean hasFileLength() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * optional uint64 fileLength = 3; + */ + public long getFileLength() { + return fileLength_; + } + /** + * optional uint64 fileLength = 3; + */ + public Builder setFileLength(long value) { + bitField0_ |= 0x00000004; + fileLength_ = value; + onChanged(); + return this; + } + /** + * optional uint64 fileLength = 3; + */ + public Builder clearFileLength() { + bitField0_ = (bitField0_ & ~0x00000004); + fileLength_ = 0L; + onChanged(); + return this; + } + + // optional uint64 postscriptLength = 4; + private long postscriptLength_ ; + /** + * optional uint64 postscriptLength = 4; + */ + public boolean hasPostscriptLength() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + /** + * optional uint64 postscriptLength = 4; + */ + public long getPostscriptLength() { + return postscriptLength_; + } + /** + * optional uint64 postscriptLength = 4; + */ + public Builder setPostscriptLength(long value) { + bitField0_ |= 0x00000008; + postscriptLength_ = value; + onChanged(); + return this; + } + /** + * optional uint64 postscriptLength = 4; + */ + public Builder clearPostscriptLength() { + bitField0_ = (bitField0_ & ~0x00000008); + postscriptLength_ = 0L; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:orc.proto.FileTail) + } + + static { + defaultInstance = new FileTail(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:orc.proto.FileTail) + } + private static com.google.protobuf.Descriptors.Descriptor internal_static_orc_proto_IntegerStatistics_descriptor; private static @@ -19055,6 +19924,11 @@ public Builder setMagicBytes( private static com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_orc_proto_PostScript_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_orc_proto_FileTail_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_orc_proto_FileTail_fieldAccessorTable; public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { @@ -19135,10 +20009,13 @@ public Builder setMagicBytes( "ression\030\002 \001(\0162\032.orc.proto.CompressionKin" + "d\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007versi", "on\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\r" + - "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017C" + - "ompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SN" + - "APPY\020\002\022\007\n\003LZO\020\003B\"\n org.apache.hadoop.hiv" + - "e.ql.io.orc" + "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t\"\206\001\n\010" + + "FileTail\022)\n\npostscript\030\001 \001(\0132\025.orc.proto" + + ".PostScript\022!\n\006footer\030\002 \001(\0132\021.orc.proto." + + "Footer\022\022\n\nfileLength\030\003 \001(\004\022\030\n\020postscript" + + "Length\030\004 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020" + + "\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003B\"\n org." + + "apache.hadoop.hive.ql.io.orc" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -19283,6 +20160,12 @@ public Builder setMagicBytes( com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_orc_proto_PostScript_descriptor, new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "MetadataLength", "WriterVersion", "Magic", }); + internal_static_orc_proto_FileTail_descriptor = + getDescriptor().getMessageTypes().get(23); + internal_static_orc_proto_FileTail_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_orc_proto_FileTail_descriptor, + new java.lang.String[] { "Postscript", "Footer", "FileLength", "PostscriptLength", }); return null; } }; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index 3ca0a6e..906eb6b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -194,14 +194,14 @@ public static Reader createReader(FileSystem fs, Path path public static class ReaderOptions { private final Configuration conf; private FileSystem filesystem; - private ReaderImpl.FileMetaInfo fileMetaInfo; private long maxLength = Long.MAX_VALUE; + private OrcTail orcTail; public ReaderOptions(Configuration conf) { this.conf = conf; } - ReaderOptions fileMetaInfo(ReaderImpl.FileMetaInfo info) { - fileMetaInfo = info; + ReaderOptions orcTail(OrcTail orcTail) { + this.orcTail = orcTail; return this; } @@ -223,8 +223,8 @@ FileSystem getFilesystem() { return filesystem; } - ReaderImpl.FileMetaInfo getFileMetaInfo() { - return fileMetaInfo; + OrcTail getOrcTail() { + return orcTail; } long getMaxLength() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 64abe91..35469d1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -78,10 +78,12 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.StringUtils; +import com.google.common.annotations.VisibleForTesting; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.Lists; import com.google.common.util.concurrent.ThreadFactoryBuilder; + /** * A MapReduce/Hive input format for ORC files. *

@@ -127,6 +129,7 @@ private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; private static final int DEFAULT_ETL_FILE_THRESHOLD = 100; + private static final int DEFAULT_CACHE_INITIAL_CAPACITY = 1024; private static final PerfLogger perfLogger = PerfLogger.getPerfLogger(); private static final String CLASS_NAME = ReaderImpl.class.getName(); @@ -430,7 +433,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf, */ static class Context { private final Configuration conf; - private static Cache footerCache; + private static Cache footerCache; private static ExecutorService threadPool = null; private final int numBuckets; private final long maxSize; @@ -467,6 +470,8 @@ public boolean validateInput(FileSystem fs, HiveConf conf, ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE); int numThreads = HiveConf.getIntVar(conf, ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS); + boolean useSoftReference = HiveConf.getBoolVar(conf, + ConfVars.HIVE_ORC_CACHE_USE_SOFT_REFERENCES); cacheStripeDetails = (cacheStripeDetailsSize > 0); @@ -480,12 +485,14 @@ public boolean validateInput(FileSystem fs, HiveConf conf, } if (footerCache == null && cacheStripeDetails) { - footerCache = CacheBuilder.newBuilder() + CacheBuilder builder = CacheBuilder.newBuilder() + .initialCapacity(DEFAULT_CACHE_INITIAL_CAPACITY) .concurrencyLevel(numThreads) - .initialCapacity(cacheStripeDetailsSize) - .maximumSize(cacheStripeDetailsSize) - .softValues() - .build(); + .maximumSize(cacheStripeDetailsSize); + if (useSoftReference) { + builder = builder.softValues(); + } + footerCache = builder.build(); } } String value = conf.get(ValidTxnList.VALID_TXNS_KEY, @@ -502,13 +509,13 @@ public boolean validateInput(FileSystem fs, HiveConf conf, private final Context context; private final FileSystem fs; private final FileStatus file; - private final FileInfo fileInfo; + private final OrcTail orcTail; private final boolean isOriginal; private final List deltas; private final boolean hasBase; SplitInfo(Context context, FileSystem fs, - FileStatus file, FileInfo fileInfo, + FileStatus file, OrcTail orcTail, boolean isOriginal, List deltas, boolean hasBase, Path dir, boolean[] covered) throws IOException { @@ -516,7 +523,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf, this.context = context; this.fs = fs; this.file = file; - this.fileInfo = fileInfo; + this.orcTail = orcTail; this.isOriginal = isOriginal; this.deltas = deltas; this.hasBase = hasBase; @@ -547,26 +554,26 @@ public ETLSplitStrategy(Context context, FileSystem fs, Path dir, List getSplits() throws IOException { List result = Lists.newArrayList(); for (FileStatus file : files) { - FileInfo info = null; + OrcTail orcTail = null; if (context.cacheStripeDetails) { - info = verifyCachedFileInfo(file); + orcTail = verifyCachedOrcTail(file); } // ignore files of 0 length if (file.getLen() > 0) { - result.add(new SplitInfo(context, fs, file, info, isOriginal, deltas, true, dir, covered)); + result.add(new SplitInfo(context, fs, file, orcTail, isOriginal, deltas, true, dir, covered)); } } return result; @@ -630,7 +637,7 @@ public BISplitStrategy(Context context, FileSystem fs, for (Map.Entry entry : blockOffsets.entrySet()) { OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), entry.getKey(), entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true, - deltas, -1); + deltas, -1, fileStatus.getLen()); splits.add(orcSplit); } } @@ -671,7 +678,7 @@ public ACIDSplitStrategy(Path dir, int numBuckets, List deltas, b if (!deltas.isEmpty()) { for (int b = 0; b < numBuckets; ++b) { if (!covered[b]) { - splits.add(new OrcSplit(dir, b, 0, new String[0], null, false, false, deltas, -1)); + splits.add(new OrcSplit(dir, b, 0, new String[0], null, false, false, deltas, -1, -1)); } } } @@ -775,12 +782,11 @@ public SplitStrategy call() throws IOException { private final Context context; private final FileSystem fs; private final FileStatus file; + private OrcTail orcTail; private final long blockSize; private final TreeMap locations; - private final FileInfo fileInfo; private List stripes; - private ReaderImpl.FileMetaInfo fileMetaInfo; - private Metadata metadata; + private List stripeStats; private List types; private boolean[] includedCols; private final boolean isOriginal; @@ -795,7 +801,7 @@ public SplitGenerator(SplitInfo splitInfo) throws IOException { this.fs = splitInfo.fs; this.file = splitInfo.file; this.blockSize = file.getBlockSize(); - this.fileInfo = splitInfo.fileInfo; + this.orcTail = splitInfo.orcTail; locations = SHIMS.getLocationsWithOffset(fs, file); this.isOriginal = splitInfo.isOriginal; this.deltas = splitInfo.deltas; @@ -839,11 +845,11 @@ static long getOverlap(long offset1, long length1, * are written with large block sizes. * @param offset the start of the split * @param length the length of the split - * @param fileMetaInfo file metadata from footer and postscript + * @param orcTail orc file tail * @throws IOException */ OrcSplit createSplit(long offset, long length, - ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException { + OrcTail orcTail) throws IOException { String[] hosts; Map.Entry startEntry = locations.floorEntry(offset); BlockLocation start = startEntry.getValue(); @@ -902,8 +908,8 @@ OrcSplit createSplit(long offset, long length, final double splitRatio = (double) length / (double) fileLen; final long scaledProjSize = projColsUncompressedSize > 0 ? (long) (splitRatio * projColsUncompressedSize) : fileLen; - return new OrcSplit(file.getPath(), offset, length, hosts, fileMetaInfo, - isOriginal, hasBase, deltas, scaledProjSize); + return new OrcSplit(file.getPath(), offset, length, hosts, orcTail, + isOriginal, hasBase, deltas, scaledProjSize, fileLen); } /** @@ -929,7 +935,6 @@ OrcSplit createSplit(long offset, long length, writerVersion != OrcFile.WriterVersion.ORIGINAL) { SearchArgument sarg = options.getSearchArgument(); List sargLeaves = sarg.getLeaves(); - List stripeStats = metadata.getStripeStatistics(); int[] filterColumns = RecordReaderImpl.mapSargColumns(sargLeaves, options.getColumnNames(), getRootColumn(isOriginal)); @@ -965,7 +970,7 @@ OrcSplit createSplit(long offset, long length, if (!includeStripe[idx]) { // create split for the previous unfinished stripe if (currentOffset != -1) { - splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); + splits.add(createSplit(currentOffset, currentLength, orcTail)); currentOffset = -1; } continue; @@ -975,7 +980,7 @@ OrcSplit createSplit(long offset, long length, // crossed a block boundary, cut the input split here. if (currentOffset != -1 && currentLength > context.minSize && (currentOffset / blockSize != stripe.getOffset() / blockSize)) { - splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); + splits.add(createSplit(currentOffset, currentLength, orcTail)); currentOffset = -1; } // if we aren't building a split, start a new one. @@ -987,12 +992,12 @@ OrcSplit createSplit(long offset, long length, (stripe.getOffset() + stripe.getLength()) - currentOffset; } if (currentLength >= context.maxSize) { - splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); + splits.add(createSplit(currentOffset, currentLength, orcTail)); currentOffset = -1; } } if (currentOffset != -1) { - splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); + splits.add(createSplit(currentOffset, currentLength, orcTail)); } // add uncovered ACID delta splits @@ -1001,41 +1006,34 @@ OrcSplit createSplit(long offset, long length, } private void populateAndCacheStripeDetails() throws IOException { - Reader orcReader = OrcFile.createReader(file.getPath(), - OrcFile.readerOptions(context.conf).filesystem(fs).maxLength(file.getLen())); - if (fileInfo != null) { - stripes = fileInfo.stripeInfos; - fileMetaInfo = fileInfo.fileMetaInfo; - metadata = fileInfo.metadata; - types = fileInfo.types; - writerVersion = fileInfo.writerVersion; - // For multiple runs, in case sendSplitsInFooter changes - if (fileMetaInfo == null && context.footerInSplits) { - fileInfo.fileMetaInfo = ((ReaderImpl) orcReader).getFileMetaInfo(); - fileInfo.metadata = orcReader.getMetadata(); - fileInfo.types = orcReader.getTypes(); - fileInfo.writerVersion = orcReader.getWriterVersion(); - } - } else { - stripes = orcReader.getStripes(); - metadata = orcReader.getMetadata(); - types = orcReader.getTypes(); - writerVersion = orcReader.getWriterVersion(); - fileMetaInfo = context.footerInSplits ? - ((ReaderImpl) orcReader).getFileMetaInfo() : null; + if (orcTail == null) { + Reader orcReader = OrcFile.createReader(file.getPath(), + OrcFile.readerOptions(context.conf) + .filesystem(fs) + .maxLength(file.getLen())); + orcTail = new OrcTail(orcReader.getFileTail(), orcReader.getSerializedFileFooter(), file.getModificationTime()); if (context.cacheStripeDetails) { - // Populate into cache. - Context.footerCache.put(file.getPath(), - new FileInfo(file.getModificationTime(), file.getLen(), stripes, - metadata, types, fileMetaInfo, writerVersion)); + context.footerCache.put(file.getPath(), orcTail); } } + + stripes = orcTail.getStripes(); + stripeStats = orcTail.getStripeStatistics(); + types = orcTail.getTypes(); + writerVersion = orcTail.getWriterVersion(); includedCols = genIncludedColumns(types, context.conf, isOriginal); - projColsUncompressedSize = computeProjectionSize(orcReader, includedCols, isOriginal); + List fileColStats = orcTail.getFooter().getStatisticsList(); + projColsUncompressedSize = computeProjectionSize(types, fileColStats, includedCols, + isOriginal); + if (!context.footerInSplits) { + orcTail = null; + } } - private long computeProjectionSize(final Reader orcReader, final boolean[] includedCols, - final boolean isOriginal) { + private long computeProjectionSize(List types, + List stats, + boolean[] includedCols, + boolean isOriginal) { final int rootIdx = getRootColumn(isOriginal); List internalColIds = Lists.newArrayList(); if (includedCols != null) { @@ -1045,7 +1043,7 @@ private long computeProjectionSize(final Reader orcReader, final boolean[] inclu } } } - return orcReader.getRawDataSizeFromColIndices(internalColIds); + return ReaderImpl.getRawDataSizeFromColIndices(internalColIds, types, stats); } private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, @@ -1149,37 +1147,6 @@ private static void cancelFutures(List> futures) { return result.toArray(new InputSplit[result.size()]); } - /** - * FileInfo. - * - * Stores information relevant to split generation for an ORC File. - * - */ - private static class FileInfo { - long modificationTime; - long size; - List stripeInfos; - ReaderImpl.FileMetaInfo fileMetaInfo; - Metadata metadata; - List types; - private OrcFile.WriterVersion writerVersion; - - - FileInfo(long modificationTime, long size, - List stripeInfos, - Metadata metadata, List types, - ReaderImpl.FileMetaInfo fileMetaInfo, - OrcFile.WriterVersion writerVersion) { - this.modificationTime = modificationTime; - this.size = size; - this.stripeInfos = stripeInfos; - this.fileMetaInfo = fileMetaInfo; - this.metadata = metadata; - this.types = types; - this.writerVersion = writerVersion; - } - } - @SuppressWarnings("unchecked") private org.apache.hadoop.mapred.RecordReader createVectorizedReader(InputSplit split, JobConf conf, Reporter reporter @@ -1199,13 +1166,17 @@ private static void cancelFutures(List> futures) { if (vectorMode) { return createVectorizedReader(inputSplit, conf, reporter); } else { + OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf); + if (inputSplit instanceof OrcSplit) { + OrcSplit split = (OrcSplit) inputSplit; + readerOptions.maxLength(split.getFileLength()).orcTail(split.getOrcTail()); + } return new OrcRecordReader(OrcFile.createReader( ((FileSplit) inputSplit).getPath(), - OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit); + readerOptions), conf, (FileSplit) inputSplit); } } - OrcSplit split = (OrcSplit) inputSplit; reporter.setStatus(inputSplit.toString()); Options options = new Options(conf).reporter(reporter); @@ -1309,7 +1280,12 @@ public float getProgress() throws IOException { if (split.hasBase()) { bucket = AcidUtils.parseBaseBucketFilename(split.getPath(), conf) .getBucket(); - reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); + OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf) + .maxLength(split.getFileLength()); + if (split.hasFooter()) { + readerOptions.orcTail(split.getOrcTail()); + } + reader = OrcFile.createReader(path, readerOptions); } else { bucket = (int) split.getStart(); reader = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java index b58c880..77d6eb5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java @@ -34,7 +34,7 @@ * */ public class OrcNewSplit extends FileSplit { - private ReaderImpl.FileMetaInfo fileMetaInfo; + private OrcTail orcTail; private boolean hasFooter; private boolean isOriginal; private boolean hasBase; @@ -51,7 +51,7 @@ protected OrcNewSplit(){ public OrcNewSplit(OrcSplit inner) throws IOException { super(inner.getPath(), inner.getStart(), inner.getLength(), inner.getLocations()); - this.fileMetaInfo = inner.getFileMetaInfo(); + this.orcTail = inner.getOrcTail(); this.hasFooter = inner.hasFooter(); this.isOriginal = inner.isOriginal(); this.hasBase = inner.hasBase(); @@ -72,20 +72,11 @@ public void write(DataOutput out) throws IOException { delta.write(out); } if (hasFooter) { - // serialize FileMetaInfo fields - Text.writeString(out, fileMetaInfo.compressionType); - WritableUtils.writeVInt(out, fileMetaInfo.bufferSize); - WritableUtils.writeVInt(out, fileMetaInfo.metadataSize); - - // serialize FileMetaInfo field footer - ByteBuffer footerBuff = fileMetaInfo.footerBuffer; - footerBuff.reset(); - - // write length of buffer - WritableUtils.writeVInt(out, footerBuff.limit() - footerBuff.position()); - out.write(footerBuff.array(), footerBuff.position(), - footerBuff.limit() - footerBuff.position()); - WritableUtils.writeVInt(out, fileMetaInfo.writerVersion.getId()); + OrcProto.FileTail fileTail = orcTail.getMinimalFileTail(); + byte[] tailBuffer = fileTail.toByteArray(); + int tailLen = tailBuffer.length; + WritableUtils.writeVInt(out, tailLen); + out.write(tailBuffer); } } @@ -107,27 +98,14 @@ public void readFields(DataInput in) throws IOException { deltas.add(dmd); } if (hasFooter) { - // deserialize FileMetaInfo fields - String compressionType = Text.readString(in); - int bufferSize = WritableUtils.readVInt(in); - int metadataSize = WritableUtils.readVInt(in); - - // deserialize FileMetaInfo field footer - int footerBuffSize = WritableUtils.readVInt(in); - ByteBuffer footerBuff = ByteBuffer.allocate(footerBuffSize); - in.readFully(footerBuff.array(), 0, footerBuffSize); - OrcFile.WriterVersion writerVersion = - ReaderImpl.getWriterVersion(WritableUtils.readVInt(in)); - - fileMetaInfo = new ReaderImpl.FileMetaInfo(compressionType, bufferSize, - metadataSize, footerBuff, writerVersion); + int tailLen = WritableUtils.readVInt(in); + byte[] tailBuffer = new byte[tailLen]; + in.readFully(tailBuffer); + OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer); + orcTail = new OrcTail(fileTail, null); } } - ReaderImpl.FileMetaInfo getFileMetaInfo(){ - return fileMetaInfo; - } - public boolean hasFooter() { return hasFooter; } @@ -143,4 +121,8 @@ public boolean hasBase() { public List getDeltas() { return deltas; } + + public OrcTail getOrcTail() { + return orcTail; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index 3e74df5..43ebdb4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -39,13 +39,14 @@ * */ public class OrcSplit extends FileSplit { - private ReaderImpl.FileMetaInfo fileMetaInfo; + private OrcTail orcTail; private boolean hasFooter; private boolean isOriginal; private boolean hasBase; private final List deltas = new ArrayList<>(); private OrcFile.WriterVersion writerVersion; private long projColsUncompressedSize; + private long fileLen; static final int BASE_FLAG = 4; static final int ORIGINAL_FLAG = 2; @@ -59,15 +60,17 @@ protected OrcSplit(){ } public OrcSplit(Path path, long offset, long length, String[] hosts, - ReaderImpl.FileMetaInfo fileMetaInfo, boolean isOriginal, boolean hasBase, - List deltas, long projectedDataSize) { + OrcTail orcTail, boolean isOriginal, boolean hasBase, + List deltas, long projectedDataSize, long fileLen) { super(path, offset, length, hosts); - this.fileMetaInfo = fileMetaInfo; - hasFooter = this.fileMetaInfo != null; + this.orcTail = orcTail; + hasFooter = this.orcTail != null; this.isOriginal = isOriginal; this.hasBase = hasBase; this.deltas.addAll(deltas); this.projColsUncompressedSize = projectedDataSize; + // setting file length to Long.MAX_VALUE will let orc reader read file length from file system + this.fileLen = fileLen <= 0 ? Long.MAX_VALUE : fileLen; } @Override @@ -84,21 +87,13 @@ public void write(DataOutput out) throws IOException { delta.write(out); } if (hasFooter) { - // serialize FileMetaInfo fields - Text.writeString(out, fileMetaInfo.compressionType); - WritableUtils.writeVInt(out, fileMetaInfo.bufferSize); - WritableUtils.writeVInt(out, fileMetaInfo.metadataSize); - - // serialize FileMetaInfo field footer - ByteBuffer footerBuff = fileMetaInfo.footerBuffer; - footerBuff.reset(); - - // write length of buffer - WritableUtils.writeVInt(out, footerBuff.limit() - footerBuff.position()); - out.write(footerBuff.array(), footerBuff.position(), - footerBuff.limit() - footerBuff.position()); - WritableUtils.writeVInt(out, fileMetaInfo.writerVersion.getId()); + OrcProto.FileTail fileTail = orcTail.getMinimalFileTail(); + byte[] tailBuffer = fileTail.toByteArray(); + int tailLen = tailBuffer.length; + WritableUtils.writeVInt(out, tailLen); + out.write(tailBuffer); } + out.writeLong(fileLen); } @Override @@ -119,25 +114,13 @@ public void readFields(DataInput in) throws IOException { deltas.add(dmd); } if (hasFooter) { - // deserialize FileMetaInfo fields - String compressionType = Text.readString(in); - int bufferSize = WritableUtils.readVInt(in); - int metadataSize = WritableUtils.readVInt(in); - - // deserialize FileMetaInfo field footer - int footerBuffSize = WritableUtils.readVInt(in); - ByteBuffer footerBuff = ByteBuffer.allocate(footerBuffSize); - in.readFully(footerBuff.array(), 0, footerBuffSize); - OrcFile.WriterVersion writerVersion = - ReaderImpl.getWriterVersion(WritableUtils.readVInt(in)); - - fileMetaInfo = new ReaderImpl.FileMetaInfo(compressionType, bufferSize, - metadataSize, footerBuff, writerVersion); + int tailLen = WritableUtils.readVInt(in); + byte[] tailBuffer = new byte[tailLen]; + in.readFully(tailBuffer); + OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer); + orcTail = new OrcTail(fileTail, null); } - } - - ReaderImpl.FileMetaInfo getFileMetaInfo(){ - return fileMetaInfo; + fileLen = in.readLong(); } public boolean hasFooter() { @@ -168,4 +151,19 @@ public boolean isAcid() { public long getProjectedColumnsUncompressedSize() { return projColsUncompressedSize; } + + public long getFileLength() { + return fileLen; + } + + public OrcTail getOrcTail() { + return orcTail; + } + + @Override + public String toString() { + return "OrcSplit [" + getPath() + ", start=" + getStart() + ", length=" + getLength() + + ", isOriginal=" + isOriginal + ", fileLength=" + fileLen + ", hasFooter=" + hasFooter + + ", hasBase=" + hasBase + ", deltas=" + (deltas == null ? 0 : deltas.size()) + "]"; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcTail.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcTail.java new file mode 100644 index 0000000..54ca2f0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcTail.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +public final class OrcTail { + // postscript + footer - Serialized in OrcSplit + private final OrcProto.FileTail fileTail; + // serialized representation of metadata, footer and postscript + private final ByteBuffer serializedTail; + // used to invalidate cache entries + private final long fileModificationTime; + private OrcProto.Metadata metadata; + + public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail) { + this(fileTail, serializedTail, -1); + } + + public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail, long fileModificationTime) { + this.fileTail = fileTail; + this.serializedTail = serializedTail; + this.fileModificationTime = fileModificationTime; + this.metadata = null; // lazily deserialized + } + + public ByteBuffer getSerializedTail() { + return serializedTail; + } + + public long getFileModificationTime() { + return fileModificationTime; + } + + public OrcProto.Footer getFooter() { + return fileTail.getFooter(); + } + + public OrcProto.PostScript getPostScript() { + return fileTail.getPostscript(); + } + + public long getFileLength() { + return fileTail.getFileLength(); + } + + public OrcFile.WriterVersion getWriterVersion() { + OrcProto.PostScript ps = fileTail.getPostscript(); + return ReaderImpl.getWriterVersion(ps.getWriterVersion()); + } + + public List getStripes() { + List result = new ArrayList<>(); + for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) { + result.add(new ReaderImpl.StripeInformationImpl(stripeProto)); + } + return result; + } + + public CompressionKind getCompressionKind() { + return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name()); + } + + public CompressionCodec getCompressionCodec() { + return WriterImpl.createCodec(getCompressionKind()); + } + + public int getCompressionBufferSize() { + return (int) fileTail.getPostscript().getCompressionBlockSize(); + } + + public List getStripeStatistics() throws IOException { + List result = new ArrayList<>(); + List ssProto = getStripeStatisticsProto(); + if (ssProto != null) { + for (OrcProto.StripeStatistics ss : ssProto) { + result.add(new StripeStatistics(ss.getColStatsList())); + } + } + return result; + } + + public List getStripeStatisticsProto() throws IOException { + if (getMetadata() == null) { + return null; + } + return getMetadata().getStripeStatsList(); + } + + public int getMetadataSize() { + return (int) getPostScript().getMetadataLength(); + } + + public List getTypes() { + return getFooter().getTypesList(); + } + + public OrcProto.FileTail getFileTail() { + return fileTail; + } + + public OrcProto.FileTail getMinimalFileTail() { + OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail); + OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter()); + footerBuilder.clearStatistics(); + fileTailBuilder.setFooter(footerBuilder.build()); + OrcProto.FileTail result = fileTailBuilder.build(); + return result; + } + + public OrcProto.Metadata getMetadata() throws IOException { + if (serializedTail == null) return null; + if (metadata == null) { + metadata = ReaderImpl.extractMetadata(serializedTail, 0, + (int) fileTail.getPostscript().getMetadataLength(), + getCompressionCodec(), getCompressionBufferSize()); + } + return metadata; + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java index 6dbe461..3105738 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java @@ -146,6 +146,10 @@ */ OrcFile.WriterVersion getWriterVersion(); + OrcProto.FileTail getFileTail(); + + ByteBuffer getSerializedFileFooter(); + /** * Options for creating a RecordReader. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index d42675c..1b7f6d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.io.orc; import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -30,6 +31,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.DiskRange; @@ -72,6 +74,7 @@ // will help avoid cpu cycles spend in deserializing at cost of increased // memory footprint. private final ByteBuffer footerByteBuffer; + private final OrcTail tail; static class StripeInformationImpl implements StripeInformation { @@ -308,29 +311,23 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException { this.path = path; this.conf = options.getConfiguration(); - FileMetaInfo footerMetaData; - if (options.getFileMetaInfo() != null) { - footerMetaData = options.getFileMetaInfo(); + OrcTail orcTail = options.getOrcTail(); + if (orcTail == null) { + tail = extractFileTail(fs, path, options.getMaxLength()); + options.orcTail(tail); } else { - footerMetaData = extractMetaInfoFromFooter(fs, path, - options.getMaxLength()); + tail = orcTail; } - MetaInfoObjExtractor rInfo = - new MetaInfoObjExtractor(footerMetaData.compressionType, - footerMetaData.bufferSize, - footerMetaData.metadataSize, - footerMetaData.footerBuffer - ); - this.footerByteBuffer = footerMetaData.footerBuffer; - this.compressionKind = rInfo.compressionKind; - this.codec = rInfo.codec; - this.bufferSize = rInfo.bufferSize; - this.metadataSize = rInfo.metadataSize; - this.metadata = rInfo.metadata; - this.footer = rInfo.footer; - this.inspector = rInfo.inspector; - this.versionList = footerMetaData.versionList; - this.writerVersion = footerMetaData.writerVersion; + this.footerByteBuffer = tail.getSerializedTail(); + this.compressionKind = tail.getCompressionKind(); + this.codec = tail.getCompressionCodec(); + this.bufferSize = tail.getCompressionBufferSize(); + this.metadataSize = tail.getMetadataSize(); + this.versionList = tail.getPostScript().getVersionList(); + this.footer = tail.getFooter(); + this.writerVersion = tail.getWriterVersion(); + this.metadata = tail.getMetadata(); + this.inspector = OrcStruct.createObjectInspector(0, tail.getTypes()); } /** @@ -347,7 +344,7 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException { return OrcFile.WriterVersion.ORIGINAL; } - private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, + private static OrcTail extractFileTail(FileSystem fs, Path path, long maxFileLength ) throws IOException { @@ -355,14 +352,20 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, ByteBuffer buffer = null; OrcProto.PostScript ps = null; OrcFile.WriterVersion writerVersion = null; + long modificationTime; + OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(); try { // figure out the size of the file using the option or filesystem long size; if (maxFileLength == Long.MAX_VALUE) { - size = fs.getFileStatus(path).getLen(); + FileStatus fileStatus = fs.getFileStatus(path); + size = fileStatus.getLen(); + modificationTime = fileStatus.getModificationTime(); } else { size = maxFileLength; + modificationTime = -1; } + fileTailBuilder.setFileLength(size); //read last bytes into buffer to get PostScript int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS); @@ -416,6 +419,8 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, buffer = extraBuf; buffer.position(0); buffer.limit(footerSize + metadataSize); + readSize += extra; + psOffset = readSize - 1 - psLen; } else { //footer is already in the bytes in buffer, just adjust position, length buffer.position(psOffset - footerSize - metadataSize); @@ -424,6 +429,17 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, // remember position for later buffer.mark(); + int bufferSize = (int) ps.getCompressionBlockSize(); + CompressionCodec codec = WriterImpl.createCodec(CompressionKind.valueOf(ps.getCompression().name())); + fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps); + int footerOffset = psOffset - footerSize; + buffer.position(footerOffset); + ByteBuffer footerBuffer = buffer.slice(); + buffer.reset(); + OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize, + codec, bufferSize); + fileTailBuilder.setFooter(footer); + } finally { try { file.close(); @@ -431,101 +447,30 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, LOG.error("Failed to close the file after another error", ex); } } - - return new FileMetaInfo( - ps.getCompression().toString(), - (int) ps.getCompressionBlockSize(), - (int) ps.getMetadataLength(), - buffer, - ps.getVersionList(), - writerVersion - ); - } - - - - /** - * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl - * from serialized fields. - * As the fields are final, the fields need to be initialized in the constructor and - * can't be done in some helper function. So this helper class is used instead. - * - */ - private static class MetaInfoObjExtractor{ - final CompressionKind compressionKind; - final CompressionCodec codec; - final int bufferSize; - final int metadataSize; - final OrcProto.Metadata metadata; - final OrcProto.Footer footer; - final ObjectInspector inspector; - - MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, - ByteBuffer footerBuffer) throws IOException { - - this.compressionKind = CompressionKind.valueOf(codecStr.toUpperCase()); - this.bufferSize = bufferSize; - this.codec = WriterImpl.createCodec(compressionKind); - this.metadataSize = metadataSize; - - int position = footerBuffer.position(); - int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize; - footerBuffer.limit(position + metadataSize); - - CodedInputStream instream = InStream.createCodedInputStream("metadata", - Lists.newArrayList(new BufferChunk(footerBuffer, 0)), metadataSize, - codec, bufferSize); - this.metadata = OrcProto.Metadata.parseFrom(instream); - - footerBuffer.position(position + metadataSize); - footerBuffer.limit(position + metadataSize + footerBufferSize); - instream = InStream.createCodedInputStream("footer", Lists.newArrayList( - new BufferChunk(footerBuffer, 0)), footerBufferSize, codec, bufferSize); - this.footer = OrcProto.Footer.parseFrom(instream); - - footerBuffer.position(position); - this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList()); - } + return new OrcTail(fileTailBuilder.build(), buffer.slice(), modificationTime); } - /** - * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file - * that is useful for Reader implementation - * - */ - static class FileMetaInfo{ - final String compressionType; - final int bufferSize; - final int metadataSize; - final ByteBuffer footerBuffer; - final List versionList; - final OrcFile.WriterVersion writerVersion; - - FileMetaInfo(String compressionType, int bufferSize, int metadataSize, - ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) { - this(compressionType, bufferSize, metadataSize, footerBuffer, null, - writerVersion); - } - - FileMetaInfo(String compressionType, int bufferSize, int metadataSize, - ByteBuffer footerBuffer, List versionList, - OrcFile.WriterVersion writerVersion){ - this.compressionType = compressionType; - this.bufferSize = bufferSize; - this.metadataSize = metadataSize; - this.footerBuffer = footerBuffer; - this.versionList = versionList; - this.writerVersion = writerVersion; - } + private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos, + int footerSize, CompressionCodec codec, + int bufferSize) throws IOException { + bb.position(footerAbsPos); + bb.limit(footerAbsPos + footerSize); + InputStream instream = InStream.create("footer", + Lists.newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize); + CodedInputStream in = CodedInputStream.newInstance(instream); + return OrcProto.Footer.parseFrom(in); } - public FileMetaInfo getFileMetaInfo(){ - return new FileMetaInfo(compressionKind.toString(), bufferSize, - metadataSize, footerByteBuffer, versionList, writerVersion); + public static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos, + int metadataSize, CompressionCodec codec, int bufferSize) throws IOException { + bb.position(metadataAbsPos); + bb.limit(metadataAbsPos + metadataSize); + InputStream inputStream = InStream.create("metadata", + Lists.newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize); + CodedInputStream in = CodedInputStream.newInstance(inputStream); + return OrcProto.Metadata.parseFrom(in); } - - @Override public RecordReader rows() throws IOException { return rowsOptions(new Options()); @@ -577,24 +522,34 @@ public long getRawDataSize() { for (int i = 0; i < stats.size(); ++i) { indices.add(i); } - deserializedSize = getRawDataSizeFromColIndices(indices); + deserializedSize = getRawDataSizeFromColIndices(indices, footer.getTypesList(), stats); } return deserializedSize; } @Override - public long getRawDataSizeFromColIndices(List colIndices) { + public OrcProto.FileTail getFileTail() { + return tail.getFileTail(); + } + + @Override + public ByteBuffer getSerializedFileFooter() { + return tail.getSerializedTail(); + } + + public static long getRawDataSizeFromColIndices(List colIndices, List types, + List stats) { long result = 0; for (int colIdx : colIndices) { - result += getRawDataSizeOfColumn(colIdx); + result += getRawDataSizeOfColumn(colIdx, types, stats); } return result; } - private long getRawDataSizeOfColumn(int colIdx) { - OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx); + private static long getRawDataSizeOfColumn(int colIdx, List types, List stats) { + OrcProto.ColumnStatistics colStat = stats.get(colIdx); long numVals = colStat.getNumberOfValues(); - Type type = footer.getTypes(colIdx); + Type type = types.get(colIdx); switch (type.getKind()) { case BINARY: @@ -638,7 +593,12 @@ private long getRawDataSizeOfColumn(int colIdx) { @Override public long getRawDataSizeOfColumns(List colNames) { List colIndices = getColumnIndicesFromNames(colNames); - return getRawDataSizeFromColIndices(colIndices); + return getRawDataSizeFromColIndices(colIndices, tail.getTypes(), tail.getFooter().getStatisticsList()); + } + + @Override + public long getRawDataSizeFromColIndices(List colIds) { + return getRawDataSizeFromColIndices(colIds, tail.getTypes(), tail.getFooter().getStatisticsList()); } private List getColumnIndicesFromNames(List colNames) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java index 64b426c..330ac12 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java @@ -168,8 +168,9 @@ public VectorizedOrcInputFormat() { if(fSplit instanceof OrcSplit){ OrcSplit orcSplit = (OrcSplit) fSplit; if (orcSplit.hasFooter()) { - opts.fileMetaInfo(orcSplit.getFileMetaInfo()); + opts.orcTail(orcSplit.getOrcTail()); } + opts.maxLength(orcSplit.getFileLength()); } Reader reader = OrcFile.createReader(path, opts); return new VectorizedOrcRecordReader(reader, conf, fSplit); diff --git a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto index 06d0b07..5505efe 100644 --- a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto +++ b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto @@ -220,3 +220,11 @@ message PostScript { // Leave this last in the record optional string magic = 8000; } + +// This gets serialized as part of OrcSplit, also used by footer cache. +message FileTail { + optional PostScript postscript = 1; + optional Footer footer = 2; + optional uint64 fileLength = 3; + optional uint64 postscriptLength = 4; +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 3e7565e..31d561b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.DataInput; @@ -33,9 +34,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.TimeZone; @@ -66,14 +69,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; -import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy; -import org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger.MyRow; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; @@ -112,9 +113,6 @@ import org.junit.Test; import org.junit.rules.TestName; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Output; - public class TestInputOutputFormat { Path workDir = new Path(System.getProperty("test.tmp.dir","target/tmp")); @@ -714,6 +712,17 @@ public MockFile(String path, int blockSize, byte[] content, } @Override + public int hashCode() { + return path.hashCode() + 31 * length; + } + + @Override + public boolean equals(final Object obj) { + if (!(obj instanceof MockFile)) { return false; } + return ((MockFile) obj).path.equals(this.path) && ((MockFile) obj).length == this.length; + } + + @Override public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append("mockFile{path: "); @@ -824,6 +833,7 @@ public String toString() { public static class MockFileSystem extends FileSystem { final List files = new ArrayList(); + final Map fileStatusMap = new HashMap<>(); Path workingDir = new Path("/"); protected Statistics statistics; @@ -857,6 +867,19 @@ public URI getUri() { } } + // increments file modification time + public void touch(MockFile file) { + if (fileStatusMap.containsKey(file)) { + FileStatus fileStatus = fileStatusMap.get(file); + FileStatus fileStatusNew = new FileStatus(fileStatus.getLen(), fileStatus.isDirectory(), + fileStatus.getReplication(), fileStatus.getBlockSize(), + fileStatus.getModificationTime() + 1, fileStatus.getAccessTime(), + fileStatus.getPermission(), fileStatus.getOwner(), fileStatus.getGroup(), + fileStatus.getPath()); + fileStatusMap.put(file, fileStatusNew); + } + } + @Override public FSDataInputStream open(Path path, int i) throws IOException { statistics.incrementReadOps(1); @@ -1038,9 +1061,14 @@ private LocatedFileStatus createLocatedDirectory(Path dir) throws IOException { } private FileStatus createStatus(MockFile file) { - return new FileStatus(file.length, false, 1, file.blockSize, 0, 0, + if (fileStatusMap.containsKey(file)) { + return fileStatusMap.get(file); + } + FileStatus fileStatus = new FileStatus(file.length, false, 1, file.blockSize, 0, 0, FsPermission.createImmutable((short) 644), "owen", "group", file.path); + fileStatusMap.put(file, fileStatus); + return fileStatus; } private FileStatus createDirectory(Path dir) { @@ -2286,4 +2314,935 @@ public void testSplitGenReadOps() throws Exception { // revert back to local fs conf.set("fs.defaultFS", "file:///"); } + + @Test + public void testSplitGenReadOpsLocalCache() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + // creates the static cache + MockPath mockPath = new MockPath(fs, "mock:///mocktbl"); + conf.set("hive.orc.cache.stripe.details.size", "-1"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl + // call-2: open - mock:/mocktbl/0_0 + // call-3: open - mock:/mocktbl/0_1 + assertEquals(3, readOpsDelta); + + // force BI to avoid reading footers + conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI"); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + orcInputFormat = new OrcInputFormat(); + splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl + assertEquals(1, readOpsDelta); + + // enable cache and use default strategy + conf.set("hive.orc.cache.stripe.details.size", "100"); + conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "HYBRID"); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + orcInputFormat = new OrcInputFormat(); + splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl + // call-2: open - mock:/mocktbl/0_0 + // call-3: open - mock:/mocktbl/0_1 + assertEquals(3, readOpsDelta); + + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + orcInputFormat = new OrcInputFormat(); + splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl + assertEquals(1, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testSplitGenReadOpsLocalCacheChangeFileLen() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + // creates the static cache + MockPath mockPath = new MockPath(fs, "mock:///mocktbl1"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktable + // call-2: open - mock:/mocktbl1/0_0 + // call-3: open - mock:/mocktbl1/0_1 + assertEquals(3, readOpsDelta); + + // change file length and look for cache misses + + fs.clear(); + + writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 100; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 100; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + orcInputFormat = new OrcInputFormat(); + splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktable + // call-2: open - mock:/mocktbl1/0_0 + // call-3: open - mock:/mocktbl1/0_1 + assertEquals(3, readOpsDelta); + + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + orcInputFormat = new OrcInputFormat(); + splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl1 + assertEquals(1, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testSplitGenReadOpsLocalCacheChangeModificationTime() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + // creates the static cache + MockPath mockPath = new MockPath(fs, "mock:///mocktbl2"); + conf.set("hive.orc.cache.use.soft.references", "true"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl2 + // call-2: open - mock:/mocktbl2/0_0 + // call-3: open - mock:/mocktbl2/0_1 + assertEquals(3, readOpsDelta); + + // change file modification time and look for cache misses + FileSystem fs1 = FileSystem.get(conf); + MockFile mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_0")); + ((MockFileSystem) fs1).touch(mockFile); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + orcInputFormat = new OrcInputFormat(); + splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl2 + // call-2: open - mock:/mocktbl2/0_1 + assertEquals(2, readOpsDelta); + + // touch the next file + fs1 = FileSystem.get(conf); + mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_1")); + ((MockFileSystem) fs1).touch(mockFile); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + orcInputFormat = new OrcInputFormat(); + splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl2 + // call-2: open - mock:/mocktbl2/0_0 + assertEquals(2, readOpsDelta); + + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + orcInputFormat = new OrcInputFormat(); + splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: listLocatedStatus - mock:/mocktbl2 + assertEquals(1, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testNonVectorReaderNoFooterSerialize() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + MockPath mockPath = new MockPath(fs, "mock:///mocktable1"); + conf.set("hive.orc.splits.include.file.footer", "false"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + + for (InputSplit split : splits) { + assertTrue("OrcSplit is expected", split instanceof OrcSplit); + // ETL strategies will have start=3 (start of first stripe) + assertTrue(split.toString().contains("start=3")); + assertTrue(split.toString().contains("hasFooter=false")); + assertTrue(split.toString().contains("hasBase=true")); + assertTrue(split.toString().contains("deltas=0")); + if (split instanceof OrcSplit) { + assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); + } + orcInputFormat.getRecordReader(split, conf, null); + } + + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: open to read footer - split 1 => mock:/mocktable1/0_0 + // call-2: open to read data - split 1 => mock:/mocktable1/0_0 + // call-3: open to read footer - split 2 => mock:/mocktable1/0_1 + // call-4: open to read data - split 2 => mock:/mocktable1/0_1 + assertEquals(4, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testNonVectorReaderFooterSerialize() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + MockPath mockPath = new MockPath(fs, "mock:///mocktable2"); + conf.set("hive.orc.splits.include.file.footer", "true"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + + for (InputSplit split : splits) { + assertTrue("OrcSplit is expected", split instanceof OrcSplit); + // ETL strategies will have start=3 (start of first stripe) + assertTrue(split.toString().contains("start=3")); + assertTrue(split.toString().contains("hasFooter=true")); + assertTrue(split.toString().contains("hasBase=true")); + assertTrue(split.toString().contains("deltas=0")); + if (split instanceof OrcSplit) { + assertTrue("Footer serialize test for non-vector reader, hasFooter is expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); + } + orcInputFormat.getRecordReader(split, conf, null); + } + + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: open to read data - split 1 => mock:/mocktable2/0_0 + // call-2: open to read data - split 2 => mock:/mocktable2/0_1 + assertEquals(2, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testVectorReaderNoFooterSerialize() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + MockPath mockPath = new MockPath(fs, "mock:///mocktable3"); + conf.set("hive.orc.splits.include.file.footer", "false"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"), + "mocktable3", inspector, true, 0); + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + + for (InputSplit split : splits) { + assertTrue("OrcSplit is expected", split instanceof OrcSplit); + // ETL strategies will have start=3 (start of first stripe) + assertTrue(split.toString().contains("start=3")); + assertTrue(split.toString().contains("hasFooter=false")); + assertTrue(split.toString().contains("hasBase=true")); + assertTrue(split.toString().contains("deltas=0")); + if (split instanceof OrcSplit) { + assertFalse("No footer serialize test for vector reader, hasFooter is not expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); + } + orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL); + } + + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: open to read footer - split 1 => mock:/mocktable3/0_0 + // call-2: open to read data - split 1 => mock:/mocktable3/0_0 + // call-3: open to read footer - split 2 => mock:/mocktable3/0_1 + // call-4: open to read data - split 2 => mock:/mocktable3/0_1 + assertEquals(4, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testVectorReaderFooterSerialize() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + MockPath mockPath = new MockPath(fs, "mock:///mocktable4"); + conf.set("hive.orc.splits.include.file.footer", "true"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"), + "mocktable4", inspector, true, 0); + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + + for (InputSplit split : splits) { + assertTrue("OrcSplit is expected", split instanceof OrcSplit); + // ETL strategies will have start=3 (start of first stripe) + assertTrue(split.toString().contains("start=3")); + assertTrue(split.toString().contains("hasFooter=true")); + assertTrue(split.toString().contains("hasBase=true")); + assertTrue(split.toString().contains("deltas=0")); + if (split instanceof OrcSplit) { + assertTrue("Footer serialize test for vector reader, hasFooter is expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); + } + orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL); + } + + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: open to read data - split 1 => mock:/mocktable4/0_0 + // call-2: open to read data - split 2 => mock:/mocktable4/0_1 + assertEquals(2, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testACIDReaderNoFooterSerialize() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + MockPath mockPath = new MockPath(fs, "mock:///mocktable5"); + conf.set("hive.transactional.table.scan", "true"); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty()); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty()); + conf.set("hive.orc.splits.include.file.footer", "false"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + + for (InputSplit split : splits) { + assertTrue("OrcSplit is expected", split instanceof OrcSplit); + // ETL strategies will have start=3 (start of first stripe) + assertTrue(split.toString().contains("start=3")); + assertTrue(split.toString().contains("hasFooter=false")); + assertTrue(split.toString().contains("hasBase=true")); + assertTrue(split.toString().contains("deltas=0")); + if (split instanceof OrcSplit) { + assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); + } + orcInputFormat.getRecordReader(split, conf, Reporter.NULL); + } + + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: open to read footer - split 1 => mock:/mocktable5/0_0 + // call-2: open to read data - split 1 => mock:/mocktable5/0_0 + // call-3: open to read footer - split 2 => mock:/mocktable5/0_1 + // call-4: open to read data - split 2 => mock:/mocktable5/0_1 + assertEquals(4, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testACIDReaderFooterSerialize() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + MockPath mockPath = new MockPath(fs, "mock:///mocktable6"); + conf.set("hive.transactional.table.scan", "true"); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty()); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty()); + conf.set("hive.orc.splits.include.file.footer", "true"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(2, splits.length); + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + + for (InputSplit split : splits) { + assertTrue("OrcSplit is expected", split instanceof OrcSplit); + // ETL strategies will have start=3 (start of first stripe) + assertTrue(split.toString().contains("start=3")); + assertTrue(split.toString().contains("hasFooter=true")); + assertTrue(split.toString().contains("hasBase=true")); + assertTrue(split.toString().contains("deltas=0")); + if (split instanceof OrcSplit) { + assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); + } + orcInputFormat.getRecordReader(split, conf, Reporter.NULL); + } + + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: open to read data - split 1 => mock:/mocktable6/0_0 + // call-2: open to read data - split 2 => mock:/mocktable6/0_1 + assertEquals(2, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testACIDReaderNoFooterSerializeWithDeltas() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + MockPath mockPath = new MockPath(fs, "mock:///mocktable7"); + conf.set("hive.transactional.table.scan", "true"); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty()); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty()); + conf.set("hive.orc.splits.include.file.footer", "false"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(1, splits.length); + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + + for (InputSplit split : splits) { + assertTrue("OrcSplit is expected", split instanceof OrcSplit); + // ETL strategies will have start=3 (start of first stripe) + assertTrue(split.toString().contains("start=3")); + assertTrue(split.toString().contains("hasFooter=false")); + assertTrue(split.toString().contains("hasBase=true")); + // NOTE: don't be surprised if deltas value is different + // in older release deltas=2 as min and max transaction are added separately to delta list. + // in newer release since both of them are put together deltas=1 + assertTrue(split.toString().contains("deltas=1")); + if (split instanceof OrcSplit) { + assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); + } + orcInputFormat.getRecordReader(split, conf, Reporter.NULL); + } + + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: open to read footer - split 1 => mock:/mocktable7/0_0 + // call-2: open to read data - split 1 => mock:/mocktable7/0_0 + // call-3: open side file (flush length) of delta directory + // call-4: fs.exists() check for delta_xxx_xxx/bucket_00000 file + assertEquals(4, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } + + @Test + public void testACIDReaderFooterSerializeWithDeltas() throws Exception { + MockFileSystem fs = new MockFileSystem(conf); + MockPath mockPath = new MockPath(fs, "mock:///mocktable8"); + conf.set("hive.transactional.table.scan", "true"); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty()); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty()); + conf.set("hive.orc.splits.include.file.footer", "true"); + conf.set("mapred.input.dir", mockPath.toString()); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + Writer writer = + OrcFile.createWriter(new Path(mockPath + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for (int i = 0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2 * i)); + } + writer.close(); + + OrcInputFormat orcInputFormat = new OrcInputFormat(); + InputSplit[] splits = orcInputFormat.getSplits(conf, 2); + assertEquals(1, splits.length); + int readOpsBefore = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsBefore = statistics.getReadOps(); + } + } + assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1); + + for (InputSplit split : splits) { + assertTrue("OrcSplit is expected", split instanceof OrcSplit); + // ETL strategies will have start=3 (start of first stripe) + assertTrue(split.toString().contains("start=3")); + assertTrue(split.toString().contains("hasFooter=true")); + assertTrue(split.toString().contains("hasBase=true")); + // NOTE: don't be surprised if deltas value is different + // in older release deltas=2 as min and max transaction are added separately to delta list. + // in newer release since both of them are put together deltas=1 + assertTrue(split.toString().contains("deltas=1")); + if (split instanceof OrcSplit) { + assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); + } + orcInputFormat.getRecordReader(split, conf, Reporter.NULL); + } + + int readOpsDelta = -1; + for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { + if (statistics.getScheme().equalsIgnoreCase("mock")) { + readOpsDelta = statistics.getReadOps() - readOpsBefore; + } + } + // call-1: open to read data - split 1 => mock:/mocktable8/0_0 + // call-2: open side file (flush length) of delta directory + // call-3: fs.exists() check for delta_xxx_xxx/bucket_00000 file + assertEquals(3, readOpsDelta); + + // revert back to local fs + conf.set("fs.defaultFS", "file:///"); + } }