diff --git a/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java b/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java
index 24715c3..52ea5d7 100644
--- a/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java
+++ b/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java
@@ -18940,6 +18940,875 @@ public Builder setMagicBytes(
// @@protoc_insertion_point(class_scope:orc.proto.PostScript)
}
+ public interface FileTailOrBuilder
+ extends com.google.protobuf.MessageOrBuilder {
+
+ // optional .orc.proto.PostScript postscript = 1;
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ boolean hasPostscript();
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ org.apache.orc.OrcProto.PostScript getPostscript();
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ org.apache.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder();
+
+ // optional .orc.proto.Footer footer = 2;
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ boolean hasFooter();
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ org.apache.orc.OrcProto.Footer getFooter();
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ org.apache.orc.OrcProto.FooterOrBuilder getFooterOrBuilder();
+
+ // optional uint64 fileLength = 3;
+ /**
+ * optional uint64 fileLength = 3;
+ */
+ boolean hasFileLength();
+ /**
+ * optional uint64 fileLength = 3;
+ */
+ long getFileLength();
+
+ // optional uint64 postscriptLength = 4;
+ /**
+ * optional uint64 postscriptLength = 4;
+ */
+ boolean hasPostscriptLength();
+ /**
+ * optional uint64 postscriptLength = 4;
+ */
+ long getPostscriptLength();
+ }
+ /**
+ * Protobuf type {@code orc.proto.FileTail}
+ *
+ *
+ * The contents of the file tail that must be serialized.
+ *
+ */
+ public static final class FileTail extends
+ com.google.protobuf.GeneratedMessage
+ implements FileTailOrBuilder {
+ // Use FileTail.newBuilder() to construct.
+ private FileTail(com.google.protobuf.GeneratedMessage.Builder> builder) {
+ super(builder);
+ this.unknownFields = builder.getUnknownFields();
+ }
+ private FileTail(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+ private static final FileTail defaultInstance;
+ public static FileTail getDefaultInstance() {
+ return defaultInstance;
+ }
+
+ public FileTail getDefaultInstanceForType() {
+ return defaultInstance;
+ }
+
+ private final com.google.protobuf.UnknownFieldSet unknownFields;
+ @java.lang.Override
+ public final com.google.protobuf.UnknownFieldSet
+ getUnknownFields() {
+ return this.unknownFields;
+ }
+ private FileTail(
+ com.google.protobuf.CodedInputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ initFields();
+ int mutable_bitField0_ = 0;
+ com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+ com.google.protobuf.UnknownFieldSet.newBuilder();
+ try {
+ boolean done = false;
+ while (!done) {
+ int tag = input.readTag();
+ switch (tag) {
+ case 0:
+ done = true;
+ break;
+ default: {
+ if (!parseUnknownField(input, unknownFields,
+ extensionRegistry, tag)) {
+ done = true;
+ }
+ break;
+ }
+ case 10: {
+ org.apache.orc.OrcProto.PostScript.Builder subBuilder = null;
+ if (((bitField0_ & 0x00000001) == 0x00000001)) {
+ subBuilder = postscript_.toBuilder();
+ }
+ postscript_ = input.readMessage(org.apache.orc.OrcProto.PostScript.PARSER, extensionRegistry);
+ if (subBuilder != null) {
+ subBuilder.mergeFrom(postscript_);
+ postscript_ = subBuilder.buildPartial();
+ }
+ bitField0_ |= 0x00000001;
+ break;
+ }
+ case 18: {
+ org.apache.orc.OrcProto.Footer.Builder subBuilder = null;
+ if (((bitField0_ & 0x00000002) == 0x00000002)) {
+ subBuilder = footer_.toBuilder();
+ }
+ footer_ = input.readMessage(org.apache.orc.OrcProto.Footer.PARSER, extensionRegistry);
+ if (subBuilder != null) {
+ subBuilder.mergeFrom(footer_);
+ footer_ = subBuilder.buildPartial();
+ }
+ bitField0_ |= 0x00000002;
+ break;
+ }
+ case 24: {
+ bitField0_ |= 0x00000004;
+ fileLength_ = input.readUInt64();
+ break;
+ }
+ case 32: {
+ bitField0_ |= 0x00000008;
+ postscriptLength_ = input.readUInt64();
+ break;
+ }
+ }
+ }
+ } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(this);
+ } catch (java.io.IOException e) {
+ throw new com.google.protobuf.InvalidProtocolBufferException(
+ e.getMessage()).setUnfinishedMessage(this);
+ } finally {
+ this.unknownFields = unknownFields.build();
+ makeExtensionsImmutable();
+ }
+ }
+ public static final com.google.protobuf.Descriptors.Descriptor
+ getDescriptor() {
+ return org.apache.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor;
+ }
+
+ protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+ internalGetFieldAccessorTable() {
+ return org.apache.orc.OrcProto.internal_static_orc_proto_FileTail_fieldAccessorTable
+ .ensureFieldAccessorsInitialized(
+ org.apache.orc.OrcProto.FileTail.class, org.apache.orc.OrcProto.FileTail.Builder.class);
+ }
+
+ public static com.google.protobuf.Parser PARSER =
+ new com.google.protobuf.AbstractParser() {
+ public FileTail parsePartialFrom(
+ com.google.protobuf.CodedInputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return new FileTail(input, extensionRegistry);
+ }
+ };
+
+ @java.lang.Override
+ public com.google.protobuf.Parser getParserForType() {
+ return PARSER;
+ }
+
+ private int bitField0_;
+ // optional .orc.proto.PostScript postscript = 1;
+ public static final int POSTSCRIPT_FIELD_NUMBER = 1;
+ private org.apache.orc.OrcProto.PostScript postscript_;
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public boolean hasPostscript() {
+ return ((bitField0_ & 0x00000001) == 0x00000001);
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public org.apache.orc.OrcProto.PostScript getPostscript() {
+ return postscript_;
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public org.apache.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder() {
+ return postscript_;
+ }
+
+ // optional .orc.proto.Footer footer = 2;
+ public static final int FOOTER_FIELD_NUMBER = 2;
+ private org.apache.orc.OrcProto.Footer footer_;
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public boolean hasFooter() {
+ return ((bitField0_ & 0x00000002) == 0x00000002);
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public org.apache.orc.OrcProto.Footer getFooter() {
+ return footer_;
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public org.apache.orc.OrcProto.FooterOrBuilder getFooterOrBuilder() {
+ return footer_;
+ }
+
+ // optional uint64 fileLength = 3;
+ public static final int FILELENGTH_FIELD_NUMBER = 3;
+ private long fileLength_;
+ /**
+ * optional uint64 fileLength = 3;
+ */
+ public boolean hasFileLength() {
+ return ((bitField0_ & 0x00000004) == 0x00000004);
+ }
+ /**
+ * optional uint64 fileLength = 3;
+ */
+ public long getFileLength() {
+ return fileLength_;
+ }
+
+ // optional uint64 postscriptLength = 4;
+ public static final int POSTSCRIPTLENGTH_FIELD_NUMBER = 4;
+ private long postscriptLength_;
+ /**
+ * optional uint64 postscriptLength = 4;
+ */
+ public boolean hasPostscriptLength() {
+ return ((bitField0_ & 0x00000008) == 0x00000008);
+ }
+ /**
+ * optional uint64 postscriptLength = 4;
+ */
+ public long getPostscriptLength() {
+ return postscriptLength_;
+ }
+
+ private void initFields() {
+ postscript_ = org.apache.orc.OrcProto.PostScript.getDefaultInstance();
+ footer_ = org.apache.orc.OrcProto.Footer.getDefaultInstance();
+ fileLength_ = 0L;
+ postscriptLength_ = 0L;
+ }
+ private byte memoizedIsInitialized = -1;
+ public final boolean isInitialized() {
+ byte isInitialized = memoizedIsInitialized;
+ if (isInitialized != -1) return isInitialized == 1;
+
+ memoizedIsInitialized = 1;
+ return true;
+ }
+
+ public void writeTo(com.google.protobuf.CodedOutputStream output)
+ throws java.io.IOException {
+ getSerializedSize();
+ if (((bitField0_ & 0x00000001) == 0x00000001)) {
+ output.writeMessage(1, postscript_);
+ }
+ if (((bitField0_ & 0x00000002) == 0x00000002)) {
+ output.writeMessage(2, footer_);
+ }
+ if (((bitField0_ & 0x00000004) == 0x00000004)) {
+ output.writeUInt64(3, fileLength_);
+ }
+ if (((bitField0_ & 0x00000008) == 0x00000008)) {
+ output.writeUInt64(4, postscriptLength_);
+ }
+ getUnknownFields().writeTo(output);
+ }
+
+ private int memoizedSerializedSize = -1;
+ public int getSerializedSize() {
+ int size = memoizedSerializedSize;
+ if (size != -1) return size;
+
+ size = 0;
+ if (((bitField0_ & 0x00000001) == 0x00000001)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeMessageSize(1, postscript_);
+ }
+ if (((bitField0_ & 0x00000002) == 0x00000002)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeMessageSize(2, footer_);
+ }
+ if (((bitField0_ & 0x00000004) == 0x00000004)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeUInt64Size(3, fileLength_);
+ }
+ if (((bitField0_ & 0x00000008) == 0x00000008)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeUInt64Size(4, postscriptLength_);
+ }
+ size += getUnknownFields().getSerializedSize();
+ memoizedSerializedSize = size;
+ return size;
+ }
+
+ private static final long serialVersionUID = 0L;
+ @java.lang.Override
+ protected java.lang.Object writeReplace()
+ throws java.io.ObjectStreamException {
+ return super.writeReplace();
+ }
+
+ public static org.apache.orc.OrcProto.FileTail parseFrom(
+ com.google.protobuf.ByteString data)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return PARSER.parseFrom(data);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseFrom(
+ com.google.protobuf.ByteString data,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return PARSER.parseFrom(data, extensionRegistry);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseFrom(byte[] data)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return PARSER.parseFrom(data);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseFrom(
+ byte[] data,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws com.google.protobuf.InvalidProtocolBufferException {
+ return PARSER.parseFrom(data, extensionRegistry);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseFrom(java.io.InputStream input)
+ throws java.io.IOException {
+ return PARSER.parseFrom(input);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseFrom(
+ java.io.InputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws java.io.IOException {
+ return PARSER.parseFrom(input, extensionRegistry);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseDelimitedFrom(java.io.InputStream input)
+ throws java.io.IOException {
+ return PARSER.parseDelimitedFrom(input);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseDelimitedFrom(
+ java.io.InputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws java.io.IOException {
+ return PARSER.parseDelimitedFrom(input, extensionRegistry);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseFrom(
+ com.google.protobuf.CodedInputStream input)
+ throws java.io.IOException {
+ return PARSER.parseFrom(input);
+ }
+ public static org.apache.orc.OrcProto.FileTail parseFrom(
+ com.google.protobuf.CodedInputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws java.io.IOException {
+ return PARSER.parseFrom(input, extensionRegistry);
+ }
+
+ public static Builder newBuilder() { return Builder.create(); }
+ public Builder newBuilderForType() { return newBuilder(); }
+ public static Builder newBuilder(org.apache.orc.OrcProto.FileTail prototype) {
+ return newBuilder().mergeFrom(prototype);
+ }
+ public Builder toBuilder() { return newBuilder(this); }
+
+ @java.lang.Override
+ protected Builder newBuilderForType(
+ com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+ Builder builder = new Builder(parent);
+ return builder;
+ }
+ /**
+ * Protobuf type {@code orc.proto.FileTail}
+ *
+ *
+ * The contents of the file tail that must be serialized.
+ *
+ */
+ public static final class Builder extends
+ com.google.protobuf.GeneratedMessage.Builder
+ implements org.apache.orc.OrcProto.FileTailOrBuilder {
+ public static final com.google.protobuf.Descriptors.Descriptor
+ getDescriptor() {
+ return org.apache.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor;
+ }
+
+ protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+ internalGetFieldAccessorTable() {
+ return org.apache.orc.OrcProto.internal_static_orc_proto_FileTail_fieldAccessorTable
+ .ensureFieldAccessorsInitialized(
+ org.apache.orc.OrcProto.FileTail.class, org.apache.orc.OrcProto.FileTail.Builder.class);
+ }
+
+ // Construct using org.apache.orc.OrcProto.FileTail.newBuilder()
+ private Builder() {
+ maybeForceBuilderInitialization();
+ }
+
+ private Builder(
+ com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+ super(parent);
+ maybeForceBuilderInitialization();
+ }
+ private void maybeForceBuilderInitialization() {
+ if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
+ getPostscriptFieldBuilder();
+ getFooterFieldBuilder();
+ }
+ }
+ private static Builder create() {
+ return new Builder();
+ }
+
+ public Builder clear() {
+ super.clear();
+ if (postscriptBuilder_ == null) {
+ postscript_ = org.apache.orc.OrcProto.PostScript.getDefaultInstance();
+ } else {
+ postscriptBuilder_.clear();
+ }
+ bitField0_ = (bitField0_ & ~0x00000001);
+ if (footerBuilder_ == null) {
+ footer_ = org.apache.orc.OrcProto.Footer.getDefaultInstance();
+ } else {
+ footerBuilder_.clear();
+ }
+ bitField0_ = (bitField0_ & ~0x00000002);
+ fileLength_ = 0L;
+ bitField0_ = (bitField0_ & ~0x00000004);
+ postscriptLength_ = 0L;
+ bitField0_ = (bitField0_ & ~0x00000008);
+ return this;
+ }
+
+ public Builder clone() {
+ return create().mergeFrom(buildPartial());
+ }
+
+ public com.google.protobuf.Descriptors.Descriptor
+ getDescriptorForType() {
+ return org.apache.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor;
+ }
+
+ public org.apache.orc.OrcProto.FileTail getDefaultInstanceForType() {
+ return org.apache.orc.OrcProto.FileTail.getDefaultInstance();
+ }
+
+ public org.apache.orc.OrcProto.FileTail build() {
+ org.apache.orc.OrcProto.FileTail result = buildPartial();
+ if (!result.isInitialized()) {
+ throw newUninitializedMessageException(result);
+ }
+ return result;
+ }
+
+ public org.apache.orc.OrcProto.FileTail buildPartial() {
+ org.apache.orc.OrcProto.FileTail result = new org.apache.orc.OrcProto.FileTail(this);
+ int from_bitField0_ = bitField0_;
+ int to_bitField0_ = 0;
+ if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
+ to_bitField0_ |= 0x00000001;
+ }
+ if (postscriptBuilder_ == null) {
+ result.postscript_ = postscript_;
+ } else {
+ result.postscript_ = postscriptBuilder_.build();
+ }
+ if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
+ to_bitField0_ |= 0x00000002;
+ }
+ if (footerBuilder_ == null) {
+ result.footer_ = footer_;
+ } else {
+ result.footer_ = footerBuilder_.build();
+ }
+ if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
+ to_bitField0_ |= 0x00000004;
+ }
+ result.fileLength_ = fileLength_;
+ if (((from_bitField0_ & 0x00000008) == 0x00000008)) {
+ to_bitField0_ |= 0x00000008;
+ }
+ result.postscriptLength_ = postscriptLength_;
+ result.bitField0_ = to_bitField0_;
+ onBuilt();
+ return result;
+ }
+
+ public Builder mergeFrom(com.google.protobuf.Message other) {
+ if (other instanceof org.apache.orc.OrcProto.FileTail) {
+ return mergeFrom((org.apache.orc.OrcProto.FileTail)other);
+ } else {
+ super.mergeFrom(other);
+ return this;
+ }
+ }
+
+ public Builder mergeFrom(org.apache.orc.OrcProto.FileTail other) {
+ if (other == org.apache.orc.OrcProto.FileTail.getDefaultInstance()) return this;
+ if (other.hasPostscript()) {
+ mergePostscript(other.getPostscript());
+ }
+ if (other.hasFooter()) {
+ mergeFooter(other.getFooter());
+ }
+ if (other.hasFileLength()) {
+ setFileLength(other.getFileLength());
+ }
+ if (other.hasPostscriptLength()) {
+ setPostscriptLength(other.getPostscriptLength());
+ }
+ this.mergeUnknownFields(other.getUnknownFields());
+ return this;
+ }
+
+ public final boolean isInitialized() {
+ return true;
+ }
+
+ public Builder mergeFrom(
+ com.google.protobuf.CodedInputStream input,
+ com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+ throws java.io.IOException {
+ org.apache.orc.OrcProto.FileTail parsedMessage = null;
+ try {
+ parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+ } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+ parsedMessage = (org.apache.orc.OrcProto.FileTail) e.getUnfinishedMessage();
+ throw e;
+ } finally {
+ if (parsedMessage != null) {
+ mergeFrom(parsedMessage);
+ }
+ }
+ return this;
+ }
+ private int bitField0_;
+
+ // optional .orc.proto.PostScript postscript = 1;
+ private org.apache.orc.OrcProto.PostScript postscript_ = org.apache.orc.OrcProto.PostScript.getDefaultInstance();
+ private com.google.protobuf.SingleFieldBuilder<
+ org.apache.orc.OrcProto.PostScript, org.apache.orc.OrcProto.PostScript.Builder, org.apache.orc.OrcProto.PostScriptOrBuilder> postscriptBuilder_;
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public boolean hasPostscript() {
+ return ((bitField0_ & 0x00000001) == 0x00000001);
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public org.apache.orc.OrcProto.PostScript getPostscript() {
+ if (postscriptBuilder_ == null) {
+ return postscript_;
+ } else {
+ return postscriptBuilder_.getMessage();
+ }
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public Builder setPostscript(org.apache.orc.OrcProto.PostScript value) {
+ if (postscriptBuilder_ == null) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ postscript_ = value;
+ onChanged();
+ } else {
+ postscriptBuilder_.setMessage(value);
+ }
+ bitField0_ |= 0x00000001;
+ return this;
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public Builder setPostscript(
+ org.apache.orc.OrcProto.PostScript.Builder builderForValue) {
+ if (postscriptBuilder_ == null) {
+ postscript_ = builderForValue.build();
+ onChanged();
+ } else {
+ postscriptBuilder_.setMessage(builderForValue.build());
+ }
+ bitField0_ |= 0x00000001;
+ return this;
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public Builder mergePostscript(org.apache.orc.OrcProto.PostScript value) {
+ if (postscriptBuilder_ == null) {
+ if (((bitField0_ & 0x00000001) == 0x00000001) &&
+ postscript_ != org.apache.orc.OrcProto.PostScript.getDefaultInstance()) {
+ postscript_ =
+ org.apache.orc.OrcProto.PostScript.newBuilder(postscript_).mergeFrom(value).buildPartial();
+ } else {
+ postscript_ = value;
+ }
+ onChanged();
+ } else {
+ postscriptBuilder_.mergeFrom(value);
+ }
+ bitField0_ |= 0x00000001;
+ return this;
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public Builder clearPostscript() {
+ if (postscriptBuilder_ == null) {
+ postscript_ = org.apache.orc.OrcProto.PostScript.getDefaultInstance();
+ onChanged();
+ } else {
+ postscriptBuilder_.clear();
+ }
+ bitField0_ = (bitField0_ & ~0x00000001);
+ return this;
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public org.apache.orc.OrcProto.PostScript.Builder getPostscriptBuilder() {
+ bitField0_ |= 0x00000001;
+ onChanged();
+ return getPostscriptFieldBuilder().getBuilder();
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ public org.apache.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder() {
+ if (postscriptBuilder_ != null) {
+ return postscriptBuilder_.getMessageOrBuilder();
+ } else {
+ return postscript_;
+ }
+ }
+ /**
+ * optional .orc.proto.PostScript postscript = 1;
+ */
+ private com.google.protobuf.SingleFieldBuilder<
+ org.apache.orc.OrcProto.PostScript, org.apache.orc.OrcProto.PostScript.Builder, org.apache.orc.OrcProto.PostScriptOrBuilder>
+ getPostscriptFieldBuilder() {
+ if (postscriptBuilder_ == null) {
+ postscriptBuilder_ = new com.google.protobuf.SingleFieldBuilder<
+ org.apache.orc.OrcProto.PostScript, org.apache.orc.OrcProto.PostScript.Builder, org.apache.orc.OrcProto.PostScriptOrBuilder>(
+ postscript_,
+ getParentForChildren(),
+ isClean());
+ postscript_ = null;
+ }
+ return postscriptBuilder_;
+ }
+
+ // optional .orc.proto.Footer footer = 2;
+ private org.apache.orc.OrcProto.Footer footer_ = org.apache.orc.OrcProto.Footer.getDefaultInstance();
+ private com.google.protobuf.SingleFieldBuilder<
+ org.apache.orc.OrcProto.Footer, org.apache.orc.OrcProto.Footer.Builder, org.apache.orc.OrcProto.FooterOrBuilder> footerBuilder_;
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public boolean hasFooter() {
+ return ((bitField0_ & 0x00000002) == 0x00000002);
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public org.apache.orc.OrcProto.Footer getFooter() {
+ if (footerBuilder_ == null) {
+ return footer_;
+ } else {
+ return footerBuilder_.getMessage();
+ }
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public Builder setFooter(org.apache.orc.OrcProto.Footer value) {
+ if (footerBuilder_ == null) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ footer_ = value;
+ onChanged();
+ } else {
+ footerBuilder_.setMessage(value);
+ }
+ bitField0_ |= 0x00000002;
+ return this;
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public Builder setFooter(
+ org.apache.orc.OrcProto.Footer.Builder builderForValue) {
+ if (footerBuilder_ == null) {
+ footer_ = builderForValue.build();
+ onChanged();
+ } else {
+ footerBuilder_.setMessage(builderForValue.build());
+ }
+ bitField0_ |= 0x00000002;
+ return this;
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public Builder mergeFooter(org.apache.orc.OrcProto.Footer value) {
+ if (footerBuilder_ == null) {
+ if (((bitField0_ & 0x00000002) == 0x00000002) &&
+ footer_ != org.apache.orc.OrcProto.Footer.getDefaultInstance()) {
+ footer_ =
+ org.apache.orc.OrcProto.Footer.newBuilder(footer_).mergeFrom(value).buildPartial();
+ } else {
+ footer_ = value;
+ }
+ onChanged();
+ } else {
+ footerBuilder_.mergeFrom(value);
+ }
+ bitField0_ |= 0x00000002;
+ return this;
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public Builder clearFooter() {
+ if (footerBuilder_ == null) {
+ footer_ = org.apache.orc.OrcProto.Footer.getDefaultInstance();
+ onChanged();
+ } else {
+ footerBuilder_.clear();
+ }
+ bitField0_ = (bitField0_ & ~0x00000002);
+ return this;
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public org.apache.orc.OrcProto.Footer.Builder getFooterBuilder() {
+ bitField0_ |= 0x00000002;
+ onChanged();
+ return getFooterFieldBuilder().getBuilder();
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ public org.apache.orc.OrcProto.FooterOrBuilder getFooterOrBuilder() {
+ if (footerBuilder_ != null) {
+ return footerBuilder_.getMessageOrBuilder();
+ } else {
+ return footer_;
+ }
+ }
+ /**
+ * optional .orc.proto.Footer footer = 2;
+ */
+ private com.google.protobuf.SingleFieldBuilder<
+ org.apache.orc.OrcProto.Footer, org.apache.orc.OrcProto.Footer.Builder, org.apache.orc.OrcProto.FooterOrBuilder>
+ getFooterFieldBuilder() {
+ if (footerBuilder_ == null) {
+ footerBuilder_ = new com.google.protobuf.SingleFieldBuilder<
+ org.apache.orc.OrcProto.Footer, org.apache.orc.OrcProto.Footer.Builder, org.apache.orc.OrcProto.FooterOrBuilder>(
+ footer_,
+ getParentForChildren(),
+ isClean());
+ footer_ = null;
+ }
+ return footerBuilder_;
+ }
+
+ // optional uint64 fileLength = 3;
+ private long fileLength_ ;
+ /**
+ * optional uint64 fileLength = 3;
+ */
+ public boolean hasFileLength() {
+ return ((bitField0_ & 0x00000004) == 0x00000004);
+ }
+ /**
+ * optional uint64 fileLength = 3;
+ */
+ public long getFileLength() {
+ return fileLength_;
+ }
+ /**
+ * optional uint64 fileLength = 3;
+ */
+ public Builder setFileLength(long value) {
+ bitField0_ |= 0x00000004;
+ fileLength_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * optional uint64 fileLength = 3;
+ */
+ public Builder clearFileLength() {
+ bitField0_ = (bitField0_ & ~0x00000004);
+ fileLength_ = 0L;
+ onChanged();
+ return this;
+ }
+
+ // optional uint64 postscriptLength = 4;
+ private long postscriptLength_ ;
+ /**
+ * optional uint64 postscriptLength = 4;
+ */
+ public boolean hasPostscriptLength() {
+ return ((bitField0_ & 0x00000008) == 0x00000008);
+ }
+ /**
+ * optional uint64 postscriptLength = 4;
+ */
+ public long getPostscriptLength() {
+ return postscriptLength_;
+ }
+ /**
+ * optional uint64 postscriptLength = 4;
+ */
+ public Builder setPostscriptLength(long value) {
+ bitField0_ |= 0x00000008;
+ postscriptLength_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * optional uint64 postscriptLength = 4;
+ */
+ public Builder clearPostscriptLength() {
+ bitField0_ = (bitField0_ & ~0x00000008);
+ postscriptLength_ = 0L;
+ onChanged();
+ return this;
+ }
+
+ // @@protoc_insertion_point(builder_scope:orc.proto.FileTail)
+ }
+
+ static {
+ defaultInstance = new FileTail(true);
+ defaultInstance.initFields();
+ }
+
+ // @@protoc_insertion_point(class_scope:orc.proto.FileTail)
+ }
+
private static com.google.protobuf.Descriptors.Descriptor
internal_static_orc_proto_IntegerStatistics_descriptor;
private static
@@ -19055,6 +19924,11 @@ public Builder setMagicBytes(
private static
com.google.protobuf.GeneratedMessage.FieldAccessorTable
internal_static_orc_proto_PostScript_fieldAccessorTable;
+ private static com.google.protobuf.Descriptors.Descriptor
+ internal_static_orc_proto_FileTail_descriptor;
+ private static
+ com.google.protobuf.GeneratedMessage.FieldAccessorTable
+ internal_static_orc_proto_FileTail_fieldAccessorTable;
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
@@ -19135,9 +20009,13 @@ public Builder setMagicBytes(
"ression\030\002 \001(\0162\032.orc.proto.CompressionKin" +
"d\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007versi",
"on\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\r" +
- "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017C" +
- "ompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SN" +
- "APPY\020\002\022\007\n\003LZO\020\003B\020\n\016org.apache.orc"
+ "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t\"\206\001\n\010" +
+ "FileTail\022)\n\npostscript\030\001 \001(\0132\025.orc.proto" +
+ ".PostScript\022!\n\006footer\030\002 \001(\0132\021.orc.proto." +
+ "Footer\022\022\n\nfileLength\030\003 \001(\004\022\030\n\020postscript" +
+ "Length\030\004 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020" +
+ "\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003B\020\n\016org." +
+ "apache.orc"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -19282,6 +20160,12 @@ public Builder setMagicBytes(
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_orc_proto_PostScript_descriptor,
new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "MetadataLength", "WriterVersion", "Magic", });
+ internal_static_orc_proto_FileTail_descriptor =
+ getDescriptor().getMessageTypes().get(23);
+ internal_static_orc_proto_FileTail_fieldAccessorTable = new
+ com.google.protobuf.GeneratedMessage.FieldAccessorTable(
+ internal_static_orc_proto_FileTail_descriptor,
+ new java.lang.String[] { "Postscript", "Footer", "FileLength", "PostscriptLength", });
return null;
}
};
diff --git a/orc/src/java/org/apache/orc/FileMetaInfo.java b/orc/src/java/org/apache/orc/FileMetaInfo.java
deleted file mode 100644
index d3cac3b..0000000
--- a/orc/src/java/org/apache/orc/FileMetaInfo.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.nio.ByteBuffer;
-import java.util.List;
-
-/**
- * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file
- * that is useful for Reader implementation
- *
- */
-public class FileMetaInfo {
- public ByteBuffer footerMetaAndPsBuffer;
- public final String compressionType;
- public final int bufferSize;
- public final int metadataSize;
- public final ByteBuffer footerBuffer;
- public final List versionList;
- public final OrcFile.WriterVersion writerVersion;
-
-
- /** Ctor used when reading splits - no version list or full footer buffer. */
- public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
- this(compressionType, bufferSize, metadataSize, footerBuffer, null,
- writerVersion, null);
- }
-
- /** Ctor used when creating file info during init and when getting a new one. */
- public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, List versionList,
- OrcFile.WriterVersion writerVersion,
- ByteBuffer fullFooterBuffer) {
- this.compressionType = compressionType;
- this.bufferSize = bufferSize;
- this.metadataSize = metadataSize;
- this.footerBuffer = footerBuffer;
- this.versionList = versionList;
- this.writerVersion = writerVersion;
- this.footerMetaAndPsBuffer = fullFooterBuffer;
- }
-
- public OrcFile.WriterVersion getWriterVersion() {
- return writerVersion;
- }
-
-}
\ No newline at end of file
diff --git a/orc/src/java/org/apache/orc/OrcFile.java b/orc/src/java/org/apache/orc/OrcFile.java
index 7dd7333..ddfa9f7 100644
--- a/orc/src/java/org/apache/orc/OrcFile.java
+++ b/orc/src/java/org/apache/orc/OrcFile.java
@@ -25,6 +25,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.orc.impl.MemoryManager;
+import org.apache.orc.impl.OrcTail;
import org.apache.orc.impl.ReaderImpl;
import org.apache.orc.impl.WriterImpl;
@@ -160,19 +161,17 @@ protected OrcFile() {}
public static class ReaderOptions {
private final Configuration conf;
private FileSystem filesystem;
- private FileMetaInfo fileMetaInfo; // TODO: this comes from some place.
private long maxLength = Long.MAX_VALUE;
- private FileMetadata fullFileMetadata; // Propagate from LLAP cache.
+ private OrcTail orcTail;
+ // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
+ // and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
+ // For now keeping this around to avoid complex surgery
+ private FileMetadata fileMetadata;
public ReaderOptions(Configuration conf) {
this.conf = conf;
}
- public ReaderOptions fileMetaInfo(FileMetaInfo info) {
- fileMetaInfo = info;
- return this;
- }
-
public ReaderOptions filesystem(FileSystem fs) {
this.filesystem = fs;
return this;
@@ -183,8 +182,8 @@ public ReaderOptions maxLength(long val) {
return this;
}
- public ReaderOptions fileMetadata(FileMetadata metadata) {
- this.fullFileMetadata = metadata;
+ public ReaderOptions orcTail(OrcTail tail) {
+ this.orcTail = tail;
return this;
}
@@ -196,16 +195,21 @@ public FileSystem getFilesystem() {
return filesystem;
}
- public FileMetaInfo getFileMetaInfo() {
- return fileMetaInfo;
- }
-
public long getMaxLength() {
return maxLength;
}
+ public OrcTail getOrcTail() {
+ return orcTail;
+ }
+
+ public ReaderOptions fileMetadata(final FileMetadata metadata) {
+ fileMetadata = metadata;
+ return this;
+ }
+
public FileMetadata getFileMetadata() {
- return fullFileMetadata;
+ return fileMetadata;
}
}
diff --git a/orc/src/java/org/apache/orc/OrcUtils.java b/orc/src/java/org/apache/orc/OrcUtils.java
index 5845ba6..dc83b9c 100644
--- a/orc/src/java/org/apache/orc/OrcUtils.java
+++ b/orc/src/java/org/apache/orc/OrcUtils.java
@@ -21,6 +21,8 @@
import java.util.Arrays;
import java.util.List;
+import org.apache.orc.impl.ReaderImpl;
+
import com.google.common.collect.Lists;
public class OrcUtils {
@@ -527,4 +529,13 @@ TypeDescription convertTypeFromProtobuf(List types,
}
throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
}
+
+ public static List convertProtoStripesToStripes(
+ List stripes) {
+ List result = new ArrayList(stripes.size());
+ for (OrcProto.StripeInformation info : stripes) {
+ result.add(new ReaderImpl.StripeInformationImpl(info));
+ }
+ return result;
+ }
}
diff --git a/orc/src/java/org/apache/orc/Reader.java b/orc/src/java/org/apache/orc/Reader.java
index 87f3293..c2d5235 100644
--- a/orc/src/java/org/apache/orc/Reader.java
+++ b/orc/src/java/org/apache/orc/Reader.java
@@ -138,6 +138,13 @@
OrcFile.WriterVersion getWriterVersion();
/**
+ * Get the file tail (footer + postscript)
+ *
+ * @return - file tail
+ */
+ OrcProto.FileTail getFileTail();
+
+ /**
* Options for creating a RecordReader.
*/
public static class Options {
@@ -354,7 +361,7 @@ public String toString() {
/**
* @return Stripe statistics.
*/
- List getStripeStatistics();
+ List getStripeStatistics() throws IOException;
/**
* @return File statistics, in original protobuf form.
diff --git a/orc/src/java/org/apache/orc/impl/OrcTail.java b/orc/src/java/org/apache/orc/impl/OrcTail.java
new file mode 100644
index 0000000..2671abf
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/OrcTail.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import static org.apache.orc.impl.ReaderImpl.extractMetadata;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.OrcProto;
+import org.apache.orc.OrcUtils;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.StripeStatistics;
+import org.apache.orc.TypeDescription;
+
+// TODO: Make OrcTail implement FileMetadata or Reader interface
+public final class OrcTail {
+ // postscript + footer - Serialized in OrcSplit
+ private final OrcProto.FileTail fileTail;
+ // serialized representation of metadata, footer and postscript
+ private final ByteBuffer serializedTail;
+ // used to invalidate cache entries
+ private final long fileModificationTime;
+ // lazily deserialized
+ private OrcProto.Metadata metadata;
+
+ public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail) {
+ this(fileTail, serializedTail, -1);
+ }
+
+ public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail, long fileModificationTime) {
+ this.fileTail = fileTail;
+ this.serializedTail = serializedTail;
+ this.fileModificationTime = fileModificationTime;
+ this.metadata = null;
+ }
+
+ public ByteBuffer getSerializedTail() {
+ return serializedTail;
+ }
+
+ public long getFileModificationTime() {
+ return fileModificationTime;
+ }
+
+ public OrcProto.Footer getFooter() {
+ return fileTail.getFooter();
+ }
+
+ public OrcProto.PostScript getPostScript() {
+ return fileTail.getPostscript();
+ }
+
+ public OrcFile.WriterVersion getWriterVersion() {
+ OrcProto.PostScript ps = fileTail.getPostscript();
+ return (ps.hasWriterVersion()
+ ? OrcFile.WriterVersion.from(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
+ }
+
+ public List getStripes() {
+ List result = new ArrayList<>();
+ for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) {
+ result.add(new ReaderImpl.StripeInformationImpl(stripeProto));
+ }
+ return result;
+ }
+
+ public CompressionKind getCompressionKind() {
+ return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name());
+ }
+
+ public CompressionCodec getCompressionCodec() {
+ return WriterImpl.createCodec(getCompressionKind());
+ }
+
+ public int getCompressionBufferSize() {
+ return (int) fileTail.getPostscript().getCompressionBlockSize();
+ }
+
+ public List getStripeStatistics() throws IOException {
+ List result = new ArrayList<>();
+ List ssProto = getStripeStatisticsProto();
+ if (ssProto != null) {
+ for (OrcProto.StripeStatistics ss : ssProto) {
+ result.add(new StripeStatistics(ss.getColStatsList()));
+ }
+ }
+ return result;
+ }
+
+ public List getStripeStatisticsProto() throws IOException {
+ if (serializedTail == null) return null;
+ if (metadata == null) {
+ metadata = extractMetadata(serializedTail, 0,
+ (int) fileTail.getPostscript().getMetadataLength(),
+ getCompressionCodec(), getCompressionBufferSize());
+ }
+ return metadata.getStripeStatsList();
+ }
+
+ public int getMetadataSize() {
+ return (int) getPostScript().getMetadataLength();
+ }
+
+ public List getTypes() {
+ return getFooter().getTypesList();
+ }
+
+ public TypeDescription getTypeDescription() {
+ return OrcUtils.convertTypeFromProtobuf(getFooter().getTypesList(), 0);
+ }
+
+ public OrcProto.FileTail getFileTail() {
+ return fileTail;
+ }
+
+ public OrcProto.FileTail getMinimalFileTail() {
+ OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail);
+ OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
+ footerBuilder.clearStatistics();
+ fileTailBuilder.setFooter(footerBuilder.build());
+ OrcProto.FileTail result = fileTailBuilder.build();
+ return result;
+ }
+}
diff --git a/orc/src/java/org/apache/orc/impl/ReaderImpl.java b/orc/src/java/org/apache/orc/impl/ReaderImpl.java
index 1dd5e43..9ac39c7 100644
--- a/orc/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/orc/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -27,16 +27,16 @@
import java.util.List;
import java.util.Set;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.FileMetadata;
import org.apache.orc.OrcFile;
-import org.apache.orc.OrcUtils;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import org.apache.orc.ColumnStatistics;
import org.apache.orc.CompressionCodec;
import org.apache.orc.FileFormatException;
-import org.apache.orc.FileMetaInfo;
-import org.apache.orc.FileMetadata;
import org.apache.orc.StripeInformation;
import org.apache.orc.StripeStatistics;
import org.slf4j.Logger;
@@ -63,27 +63,25 @@
private final long maxLength;
protected final Path path;
protected final org.apache.orc.CompressionKind compressionKind;
- protected final CompressionCodec codec;
- protected final int bufferSize;
- private final List stripeStats;
+ protected CompressionCodec codec;
+ protected int bufferSize;
+ protected OrcProto.Metadata metadata;
+ private List stripeStats;
private final int metadataSize;
protected final List types;
- private final TypeDescription schema;
+ private TypeDescription schema;
private final List userMetadata;
private final List fileStats;
private final List stripes;
protected final int rowIndexStride;
private final long contentLength, numberOfRows;
-
private long deserializedSize = -1;
protected final Configuration conf;
private final List versionList;
private final OrcFile.WriterVersion writerVersion;
- // Same for metastore cache - maintains the same background buffer, but includes postscript.
- // This will only be set if the file footer/metadata was read from disk.
- private final ByteBuffer footerMetaAndPsBuffer;
+ protected OrcTail tail;
public static class StripeInformationImpl
implements StripeInformation {
@@ -207,6 +205,11 @@ public long getContentLength() {
}
@Override
+ public OrcProto.FileTail getFileTail() {
+ return tail.getFileTail();
+ }
+
+ @Override
public int getRowIndexStride() {
return rowIndexStride;
}
@@ -261,6 +264,32 @@ protected static void ensureOrcFooter(FSDataInputStream in,
}
/**
+ * Ensure this is an ORC file to prevent users from trying to read text
+ * files or RC files as ORC files.
+ * @param psLen the postscript length
+ * @param buffer the tail of the file
+ * @throws IOException
+ */
+ protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException {
+ int magicLength = OrcFile.MAGIC.length();
+ int fullLength = magicLength + 1;
+ if (psLen < fullLength || buffer.remaining() < fullLength) {
+ throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
+ }
+
+ int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
+ byte[] array = buffer.array();
+ // now look for the magic string at the end of the postscript.
+ if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
+ // if it isn't there, this may be 0.11.0 version of the ORC file.
+ // Read the first 3 bytes from the buffer to check for the header
+ if (!Text.decode(buffer.array(), 0, magicLength).equals(OrcFile.MAGIC)) {
+ throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
+ }
+ }
+ }
+
+ /**
* Build a version string out of an array.
* @param version the version number as a list
* @return the human readable form of the version string
@@ -316,7 +345,6 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
this.path = path;
this.conf = options.getConfiguration();
this.maxLength = options.getMaxLength();
-
FileMetadata fileMetadata = options.getFileMetadata();
if (fileMetadata != null) {
this.compressionKind = fileMetadata.getCompressionKind();
@@ -334,40 +362,30 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
this.fileStats = fileMetadata.getFileStats();
this.stripes = fileMetadata.getStripes();
this.userMetadata = null; // not cached and not needed here
- this.footerMetaAndPsBuffer = null;
} else {
- FileMetaInfo footerMetaData;
- if (options.getFileMetaInfo() != null) {
- footerMetaData = options.getFileMetaInfo();
- this.footerMetaAndPsBuffer = null;
+ OrcTail orcTail = options.getOrcTail();
+ if (orcTail == null) {
+ tail = extractFileTail(fs, path, options.getMaxLength());
+ options.orcTail(tail);
} else {
- footerMetaData = extractMetaInfoFromFooter(fs, path,
- options.getMaxLength());
- this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
+ tail = orcTail;
}
- options.fileMetaInfo(footerMetaData);
- MetaInfoObjExtractor rInfo =
- new MetaInfoObjExtractor(footerMetaData.compressionType,
- footerMetaData.bufferSize,
- footerMetaData.metadataSize,
- footerMetaData.footerBuffer
- );
- this.compressionKind = rInfo.compressionKind;
- this.codec = rInfo.codec;
- this.bufferSize = rInfo.bufferSize;
- this.metadataSize = rInfo.metadataSize;
- this.stripeStats = rInfo.metadata.getStripeStatsList();
- this.types = rInfo.footer.getTypesList();
- this.rowIndexStride = rInfo.footer.getRowIndexStride();
- this.contentLength = rInfo.footer.getContentLength();
- this.numberOfRows = rInfo.footer.getNumberOfRows();
- this.userMetadata = rInfo.footer.getMetadataList();
- this.fileStats = rInfo.footer.getStatisticsList();
- this.versionList = footerMetaData.versionList;
- this.writerVersion = footerMetaData.writerVersion;
- this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList());
- }
- this.schema = OrcUtils.convertTypeFromProtobuf(this.types, 0);
+ this.compressionKind = tail.getCompressionKind();
+ this.codec = tail.getCompressionCodec();
+ this.bufferSize = tail.getCompressionBufferSize();
+ this.metadataSize = tail.getMetadataSize();
+ this.versionList = tail.getPostScript().getVersionList();
+ this.types = tail.getFooter().getTypesList();
+ this.rowIndexStride = tail.getFooter().getRowIndexStride();
+ this.contentLength = tail.getFooter().getContentLength();
+ this.numberOfRows = tail.getFooter().getNumberOfRows();
+ this.userMetadata = tail.getFooter().getMetadataList();
+ this.fileStats = tail.getFooter().getStatisticsList();
+ this.writerVersion = tail.getWriterVersion();
+ this.stripes = tail.getStripes();
+ this.schema = tail.getTypeDescription();
+ this.stripeStats = tail.getStripeStatisticsProto();
+ }
}
/**
@@ -392,7 +410,7 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
Lists.newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
}
- private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+ public static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
bb.position(metadataAbsPos);
bb.limit(metadataAbsPos + metadataSize);
@@ -425,22 +443,47 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
return ps;
}
- private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
- Path path,
- long maxFileLength
- ) throws IOException {
+ public static OrcTail extractFileTail(ByteBuffer buffer, long fileLength) throws IOException {
+ int readSize = buffer.limit();
+ int psLen = buffer.get(readSize - 1) & 0xff;
+ int psOffset = readSize - 1 - psLen;
+ ensureOrcFooter(buffer, psLen);
+ byte[] psBuffer = new byte[psLen];
+ System.arraycopy(buffer.array(), psOffset, psBuffer, 0, psLen);
+ OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(psBuffer);
+ int footerSize = (int) ps.getFooterLength();
+ CompressionCodec codec = WriterImpl
+ .createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+ OrcProto.Footer footer = extractFooter(buffer,
+ (int) (buffer.position() + ps.getMetadataLength()),
+ footerSize, codec, (int) ps.getCompressionBlockSize());
+ OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder()
+ .setPostscriptLength(psLen)
+ .setPostscript(ps)
+ .setFooter(footer)
+ .setFileLength(fileLength);
+ return new OrcTail(fileTailBuilder.build(), buffer.slice());
+ }
+
+ protected OrcTail extractFileTail(FileSystem fs, Path path,
+ long maxFileLength) throws IOException {
FSDataInputStream file = fs.open(path);
- ByteBuffer buffer = null, fullFooterBuffer = null;
- OrcProto.PostScript ps = null;
- OrcFile.WriterVersion writerVersion = null;
+ ByteBuffer buffer;
+ OrcProto.PostScript ps;
+ OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
+ long modificationTime;
try {
// figure out the size of the file using the option or filesystem
long size;
if (maxFileLength == Long.MAX_VALUE) {
- size = fs.getFileStatus(path).getLen();
+ FileStatus fileStatus = fs.getFileStatus(path);
+ size = fileStatus.getLen();
+ modificationTime = fileStatus.getModificationTime();
} else {
size = maxFileLength;
+ modificationTime = -1;
}
+ fileTailBuilder.setFileLength(size);
//read last bytes into buffer to get PostScript
int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
@@ -456,10 +499,12 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
ensureOrcFooter(file, path, psLen, buffer);
int psOffset = readSize - 1 - psLen;
ps = extractPostScript(buffer, path, psLen, psOffset);
+ bufferSize = (int) ps.getCompressionBlockSize();
+ codec = WriterImpl.createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+ fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps);
int footerSize = (int) ps.getFooterLength();
int metadataSize = (int) ps.getMetadataLength();
- writerVersion = extractWriterVersion(ps);
//check if extra bytes need to be read
int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
@@ -473,17 +518,23 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
extraBuf.put(buffer);
buffer = extraBuf;
buffer.position(0);
- fullFooterBuffer = buffer.slice();
buffer.limit(footerSize + metadataSize);
+ readSize += extra;
+ psOffset = readSize - 1 - psLen;
} else {
//footer is already in the bytes in buffer, just adjust position, length
buffer.position(psOffset - footerSize - metadataSize);
- fullFooterBuffer = buffer.slice();
buffer.limit(psOffset);
}
- // remember position for later TODO: what later? this comment is useless
buffer.mark();
+ int footerOffset = psOffset - footerSize;
+ buffer.position(footerOffset);
+ ByteBuffer footerBuffer = buffer.slice();
+ buffer.reset();
+ OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize,
+ codec, bufferSize);
+ fileTailBuilder.setFooter(footer);
} finally {
try {
file.close();
@@ -492,68 +543,12 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
}
}
- return new FileMetaInfo(
- ps.getCompression().toString(),
- (int) ps.getCompressionBlockSize(),
- (int) ps.getMetadataLength(),
- buffer,
- ps.getVersionList(),
- writerVersion,
- fullFooterBuffer
- );
- }
-
- protected static OrcFile.WriterVersion extractWriterVersion(OrcProto.PostScript ps) {
- return (ps.hasWriterVersion()
- ? getWriterVersion(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
- }
-
- protected static List convertProtoStripesToStripes(
- List stripes) {
- List result = new ArrayList(stripes.size());
- for (OrcProto.StripeInformation info : stripes) {
- result.add(new StripeInformationImpl(info));
- }
- return result;
- }
-
- /**
- * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
- * from serialized fields.
- * As the fields are final, the fields need to be initialized in the constructor and
- * can't be done in some helper function. So this helper class is used instead.
- *
- */
- private static class MetaInfoObjExtractor{
- final org.apache.orc.CompressionKind compressionKind;
- final CompressionCodec codec;
- final int bufferSize;
- final int metadataSize;
- final OrcProto.Metadata metadata;
- final OrcProto.Footer footer;
-
- MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer) throws IOException {
-
- this.compressionKind = org.apache.orc.CompressionKind.valueOf(codecStr.toUpperCase());
- this.bufferSize = bufferSize;
- this.codec = WriterImpl.createCodec(compressionKind);
- this.metadataSize = metadataSize;
-
- int position = footerBuffer.position();
- int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
-
- this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
- this.footer = extractFooter(
- footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);
-
- footerBuffer.position(position);
- }
+ return new OrcTail(fileTailBuilder.build(), buffer.slice(), modificationTime);
}
@Override
public ByteBuffer getSerializedFileFooter() {
- return footerMetaAndPsBuffer;
+ return tail.getSerializedTail();
}
@Override
@@ -722,7 +717,11 @@ private int getLastIdx() {
}
@Override
- public List getStripeStatistics() {
+ public List getStripeStatistics() throws IOException {
+ if (stripeStats == null && metadata == null) {
+ metadata = extractMetadata(tail.getSerializedTail(), 0, metadataSize, codec, bufferSize);
+ stripeStats = metadata.getStripeStatsList();
+ }
List result = new ArrayList<>();
for (OrcProto.StripeStatistics ss : stripeStats) {
result.add(new StripeStatistics(ss.getColStatsList()));
diff --git a/orc/src/protobuf/orc_proto.proto b/orc/src/protobuf/orc_proto.proto
index f4935b4..3b2cf1b 100644
--- a/orc/src/protobuf/orc_proto.proto
+++ b/orc/src/protobuf/orc_proto.proto
@@ -220,3 +220,11 @@ message PostScript {
// Leave this last in the record
optional string magic = 8000;
}
+
+// The contents of the file tail that must be serialized.
+message FileTail {
+ optional PostScript postscript = 1;
+ optional Footer footer = 2;
+ optional uint64 fileLength = 3;
+ optional uint64 postscriptLength = 4;
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java
index 6556fbf..2fbe1d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java
@@ -28,7 +28,6 @@
import java.util.Map;
import java.util.Map.Entry;
-import org.apache.commons.codec.binary.Hex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -36,7 +35,6 @@
import org.apache.hadoop.hive.metastore.api.MetadataPpdResult;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.io.HdfsUtils;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.FileInfo;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.FooterCache;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
@@ -44,7 +42,7 @@
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId;
-import org.apache.orc.FileMetaInfo;
+import org.apache.orc.impl.OrcTail;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -74,13 +72,12 @@ public ExternalCache(LocalCache lc, ExternalFooterCachesByConf efcf) {
}
@Override
- public void put(Long fileId, FileStatus file, FileMetaInfo fileMetaInfo, Reader orcReader)
- throws IOException {
- localCache.put(fileId, file, fileMetaInfo, orcReader);
- if (fileId != null) {
+ public void put(OrcInputFormat.FooterCacheKey key, OrcTail orcTail) throws IOException {
+ localCache.put(key.getPath(), orcTail);
+ if (key.getFileId() != null) {
try {
- externalCacheSrc.getCache(conf).putFileMetadata(Lists.newArrayList(fileId),
- Lists.newArrayList(((ReaderImpl)orcReader).getSerializedFileFooter()));
+ externalCacheSrc.getCache(conf).putFileMetadata(Lists.newArrayList(key.getFileId()),
+ Lists.newArrayList(orcTail.getSerializedTail()));
} catch (HiveException e) {
throw new IOException(e);
}
@@ -108,7 +105,7 @@ public void configure(HiveConf queryConfig) {
@Override
public void getAndValidate(List files, boolean isOriginal,
- FileInfo[] result, ByteBuffer[] ppdResult) throws IOException, HiveException {
+ OrcTail[] result, ByteBuffer[] ppdResult) throws IOException, HiveException {
assert result.length == files.size();
assert ppdResult == null || ppdResult.length == files.size();
// First, check the local cache.
@@ -155,7 +152,7 @@ public void getAndValidate(List files, boolean isOriginal,
}
private int getAndVerifyIndex(HashMap posMap,
- List files, FileInfo[] result, Long fileId) {
+ List files, OrcTail[] result, Long fileId) {
int ix = posMap.get(fileId);
assert result[ix] == null;
assert fileId != null && fileId.equals(files.get(ix).getFileId());
@@ -163,9 +160,9 @@ private int getAndVerifyIndex(HashMap posMap,
}
private boolean processBbResult(
- ByteBuffer bb, int ix, HdfsFileStatusWithId file, FileInfo[] result) throws IOException {
+ ByteBuffer bb, int ix, HdfsFileStatusWithId file, OrcTail[] result) throws IOException {
if (bb == null) return true;
- result[ix] = createFileInfoFromMs(file, bb);
+ result[ix] = createOrcTailFromMs(file, bb);
if (result[ix] == null) {
return false;
}
@@ -175,12 +172,12 @@ private boolean processBbResult(
}
private void processPpdResult(MetadataPpdResult mpr, HdfsFileStatusWithId file,
- int ix, FileInfo[] result, ByteBuffer[] ppdResult) throws IOException {
+ int ix, OrcTail[] result, ByteBuffer[] ppdResult) throws IOException {
if (mpr == null) return; // This file is unknown to metastore.
ppdResult[ix] = mpr.isSetIncludeBitset() ? mpr.bufferForIncludeBitset() : NO_SPLIT_AFTER_PPD;
if (mpr.isSetMetadata()) {
- result[ix] = createFileInfoFromMs(file, mpr.bufferForMetadata());
+ result[ix] = createOrcTailFromMs(file, mpr.bufferForMetadata());
if (result[ix] != null) {
localCache.put(file.getFileStatus().getPath(), result[ix]);
}
@@ -188,7 +185,7 @@ private void processPpdResult(MetadataPpdResult mpr, HdfsFileStatusWithId file,
}
private List determineFileIdsToQuery(
- List files, FileInfo[] result, HashMap posMap) {
+ List files, OrcTail[] result, HashMap posMap) {
for (int i = 0; i < result.length; ++i) {
if (result[i] != null) continue;
HdfsFileStatusWithId file = files.get(i);
@@ -293,25 +290,12 @@ public static void translateSargToTableColIndexes(
}
}
- private static FileInfo createFileInfoFromMs(
+ private static OrcTail createOrcTailFromMs(
HdfsFileStatusWithId file, ByteBuffer bb) throws IOException {
if (bb == null) return null;
FileStatus fs = file.getFileStatus();
- ReaderImpl.FooterInfo fi = null;
ByteBuffer copy = bb.duplicate();
- try {
- fi = ReaderImpl.extractMetaInfoFromFooter(copy, fs.getPath());
- } catch (Exception ex) {
- byte[] data = new byte[bb.remaining()];
- System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), data, 0, data.length);
- String msg = "Failed to parse the footer stored in cache for file ID "
- + file.getFileId() + " " + bb + " [ " + Hex.encodeHexString(data) + " ]";
- LOG.error(msg, ex);
- return null;
- }
- return new FileInfo(fs.getModificationTime(), fs.getLen(), fi.getStripes(), fi.getMetadata(),
- fi.getFooter().getTypesList(), fi.getFooter().getStatisticsList(), fi.getFileMetaInfo(),
- fi.getFileMetaInfo().writerVersion, file.getFileId());
+ return new OrcTail(null, copy, fs.getModificationTime());
}
private static final class Baos extends ByteArrayOutputStream {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/LocalCache.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/LocalCache.java
index 8151e52..fe93656 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/LocalCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/LocalCache.java
@@ -24,29 +24,23 @@
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.FileInfo;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.FooterCache;
-import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId;
-import org.apache.orc.FileMetaInfo;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.orc.impl.OrcTail;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
-/** Local footer cache using Guava. Stores convoluted Java objects. */
-class LocalCache implements FooterCache {
+class LocalCache implements OrcInputFormat.FooterCache {
private static final Logger LOG = LoggerFactory.getLogger(LocalCache.class);
- private static boolean isDebugEnabled = LOG.isDebugEnabled();
+ private final Cache cache;
- private final Cache cache;
-
- public LocalCache(int numThreads, int cacheStripeDetailsSize) {
+ LocalCache(int numThreads, int cacheStripeDetailsSize) {
cache = CacheBuilder.newBuilder()
.concurrencyLevel(numThreads)
- .initialCapacity(cacheStripeDetailsSize)
.maximumSize(cacheStripeDetailsSize)
- .softValues()
.build();
}
@@ -55,49 +49,50 @@ public void clear() {
cache.cleanUp();
}
- public void getAndValidate(List files, boolean isOriginal,
- FileInfo[] result, ByteBuffer[] ppdResult) throws IOException {
+ public void put(Path path, OrcTail tail) {
+ cache.put(path, tail);
+ }
+
+ public OrcTail get(Path path) {
+ return cache.getIfPresent(path);
+ }
+
+ @Override
+ public void getAndValidate(final List files,
+ final boolean isOriginal,
+ final OrcTail[] result, final ByteBuffer[] ppdResult)
+ throws IOException, HiveException {
// TODO: should local cache also be by fileId? Preserve the original logic for now.
assert result.length == files.size();
int i = -1;
- for (HdfsFileStatusWithId fileWithId : files) {
+ for (HadoopShims.HdfsFileStatusWithId fileWithId : files) {
++i;
FileStatus file = fileWithId.getFileStatus();
Path path = file.getPath();
- Long fileId = fileWithId.getFileId();
- FileInfo fileInfo = cache.getIfPresent(path);
- if (isDebugEnabled) {
- LOG.debug("Info " + (fileInfo == null ? "not " : "") + "cached for path: " + path);
+ OrcTail tail = cache.getIfPresent(path);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Serialized tail " + (tail == null ? "not " : "") + "cached for path: " + path);
}
- if (fileInfo == null) continue;
- if ((fileId != null && fileInfo.fileId != null && fileId == fileInfo.fileId)
- || (fileInfo.modificationTime == file.getModificationTime() &&
- fileInfo.size == file.getLen())) {
- result[i] = fileInfo;
+ if (tail == null) continue;
+ if (tail != null && file.getLen() == tail.getFileTail().getFileLength()
+ && file.getModificationTime() == tail.getFileModificationTime()) {
+ result[i] = tail;
continue;
}
// Invalidate
cache.invalidate(path);
- if (isDebugEnabled) {
+ if (LOG.isDebugEnabled()) {
LOG.debug("Meta-Info for : " + path + " changed. CachedModificationTime: "
- + fileInfo.modificationTime + ", CurrentModificationTime: "
- + file.getModificationTime() + ", CachedLength: " + fileInfo.size
+ + tail.getFileModificationTime() + ", CurrentModificationTime: "
+ + file.getModificationTime() + ", CachedLength: " + tail.getFileTail().getFileLength()
+ ", CurrentLength: " + file.getLen());
}
}
}
- public void put(Path path, FileInfo fileInfo) {
- cache.put(path, fileInfo);
- }
-
@Override
- public void put(Long fileId, FileStatus file, FileMetaInfo fileMetaInfo, Reader orcReader)
- throws IOException {
- cache.put(file.getPath(), new FileInfo(file.getModificationTime(), file.getLen(),
- orcReader.getStripes(), orcReader.getStripeStatistics(), orcReader.getTypes(),
- orcReader.getOrcProtoFileStatistics(), fileMetaInfo, orcReader.getWriterVersion(),
- fileId));
+ public boolean hasPpd() {
+ return false;
}
@Override
@@ -106,7 +101,8 @@ public boolean isBlocking() {
}
@Override
- public boolean hasPpd() {
- return false;
+ public void put(final OrcInputFormat.FooterCacheKey cacheKey, final OrcTail orcTail)
+ throws IOException {
+ put(cacheKey.getPath(), orcTail);
}
}
\ No newline at end of file
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index 58e5da1..5366020 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -30,6 +30,7 @@
import org.apache.orc.FileMetadata;
import org.apache.orc.impl.MemoryManager;
import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.OrcTail;
/**
* Contains factory methods to read or write ORC files.
@@ -72,6 +73,11 @@ public ReaderOptions fileMetadata(FileMetadata metadata) {
super.fileMetadata(metadata);
return this;
}
+
+ public ReaderOptions orcTail(OrcTail orcTail) {
+ super.orcTail(orcTail);
+ return this;
+ }
}
public static ReaderOptions readerOptions(Configuration conf) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java
index c9c7b5a..52b60db 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java
@@ -29,6 +29,8 @@
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.orc.OrcProto;
import org.apache.orc.StripeInformation;
+import org.apache.orc.StripeStatistics;
+import org.apache.orc.impl.OrcTail;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -41,15 +43,16 @@ public SplitInfos applySargToMetadata(
SearchArgument sarg, ByteBuffer fileMetadata) throws IOException {
// TODO: ideally we should store shortened representation of only the necessary fields
// in HBase; it will probably require custom SARG application code.
- ReaderImpl.FooterInfo fi = ReaderImpl.extractMetaInfoFromFooter(fileMetadata, null);
- OrcProto.Footer footer = fi.getFooter();
+ // FIXME: how do we get file length here?
+ OrcTail orcTail = ReaderImpl.extractFileTail(fileMetadata, -1);
+ OrcProto.Footer footer = orcTail.getFooter();
int stripeCount = footer.getStripesCount();
boolean[] result = OrcInputFormat.pickStripesViaTranslatedSarg(
- sarg, fi.getFileMetaInfo().getWriterVersion(),
- footer.getTypesList(), fi.getMetadata(), stripeCount);
+ sarg, orcTail.getWriterVersion(),
+ footer.getTypesList(), orcTail.getStripeStatistics(), stripeCount);
// For ORC case, send the boundaries of the stripes so we don't have to send the footer.
SplitInfos.Builder sb = SplitInfos.newBuilder();
- List stripes = fi.getStripes();
+ List stripes = orcTail.getStripes();
boolean isEliminated = true;
for (int i = 0; i < result.length; ++i) {
if (result != null && !result[i]) continue;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index d7a8c2f..c659487 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -53,11 +53,11 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.orc.ColumnStatistics;
-import org.apache.orc.FileMetaInfo;
import org.apache.orc.OrcUtils;
import org.apache.orc.StripeInformation;
import org.apache.orc.StripeStatistics;
import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.OrcTail;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -648,20 +648,20 @@ public AcidDirInfo(FileSystem fs, Path splitPath, Directory acidInfo,
private final Context context;
private final FileSystem fs;
private final HdfsFileStatusWithId fileWithId;
- private final FileInfo fileInfo;
+ private final OrcTail orcTail;
private final boolean isOriginal;
private final List deltas;
private final boolean hasBase;
private final ByteBuffer ppdResult;
- SplitInfo(Context context, FileSystem fs, HdfsFileStatusWithId fileWithId, FileInfo fileInfo,
+ SplitInfo(Context context, FileSystem fs, HdfsFileStatusWithId fileWithId, OrcTail orcTail,
boolean isOriginal, List deltas, boolean hasBase, Path dir,
boolean[] covered, ByteBuffer ppdResult) throws IOException {
super(dir, context.numBuckets, deltas, covered);
this.context = context;
this.fs = fs;
this.fileWithId = fileWithId;
- this.fileInfo = fileInfo;
+ this.orcTail = orcTail;
this.isOriginal = isOriginal;
this.deltas = deltas;
this.hasBase = hasBase;
@@ -669,11 +669,11 @@ public AcidDirInfo(FileSystem fs, Path splitPath, Directory acidInfo,
}
@VisibleForTesting
- public SplitInfo(Context context, FileSystem fs, FileStatus fileStatus, FileInfo fileInfo,
+ public SplitInfo(Context context, FileSystem fs, FileStatus fileStatus, OrcTail orcTail,
boolean isOriginal, ArrayList deltas, boolean hasBase, Path dir,
boolean[] covered) throws IOException {
this(context, fs, AcidUtils.createOriginalObj(null, fileStatus),
- fileInfo, isOriginal, deltas, hasBase, dir, covered, null);
+ orcTail, isOriginal, deltas, hasBase, dir, covered, null);
}
}
@@ -728,13 +728,13 @@ public ETLSplitStrategy(Context context, FileSystem fs, Path dir,
FooterCache cache = context.cacheStripeDetails ? ((deltas == null || deltas.isEmpty())
? context.footerCache : Context.localCache) : null;
if (cache != null) {
- FileInfo[] infos = new FileInfo[files.size()];
+ OrcTail[] orcTails = new OrcTail[files.size()];
ByteBuffer[] ppdResults = null;
if (cache.hasPpd()) {
ppdResults = new ByteBuffer[files.size()];
}
try {
- cache.getAndValidate(files, isOriginal, infos, ppdResults);
+ cache.getAndValidate(files, isOriginal, orcTails, ppdResults);
} catch (HiveException e) {
throw new IOException(e);
}
@@ -745,16 +745,16 @@ public ETLSplitStrategy(Context context, FileSystem fs, Path dir,
dir = dirs.get(++dirIx);
filesInDirCount = dir.fileCount;
}
- FileInfo info = infos[i];
+ OrcTail orcTail = orcTails[i];
ByteBuffer ppdResult = ppdResults == null ? null : ppdResults[i];
HdfsFileStatusWithId file = files.get(i);
- if (info != null) {
+ if (orcTail != null) {
// Cached copy is valid
context.cacheHitCounter.incrementAndGet();
}
// Ignore files eliminated by PPD, or of 0 length.
if (ppdResult != FooterCache.NO_SPLIT_AFTER_PPD && file.getFileStatus().getLen() > 0) {
- result.add(new SplitInfo(context, dir.fs, file, info,
+ result.add(new SplitInfo(context, dir.fs, file, orcTail,
isOriginal, deltas, true, dir.dir, covered, ppdResult));
}
}
@@ -921,7 +921,7 @@ public BISplitStrategy(Context context, FileSystem fs,
for (Map.Entry entry : blockOffsets.entrySet()) {
OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), fileKey, entry.getKey(),
entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true,
- deltas, -1);
+ deltas, -1, fileStatus.getLen());
splits.add(orcSplit);
}
}
@@ -963,7 +963,7 @@ public ACIDSplitStrategy(Path dir, int numBuckets, List deltas, b
if (!deltas.isEmpty()) {
for (int b = 0; b < numBuckets; ++b) {
if (!covered[b]) {
- splits.add(new OrcSplit(dir, null, b, 0, new String[0], null, false, false, deltas, -1));
+ splits.add(new OrcSplit(dir, null, b, 0, new String[0], null, false, false, deltas, -1, -1));
}
}
}
@@ -1054,9 +1054,8 @@ private AcidDirInfo callInternal() throws IOException {
private final Long fsFileId;
private final long blockSize;
private final TreeMap locations;
- private final FileInfo fileInfo;
+ private OrcTail orcTail;
private List stripes;
- private FileMetaInfo fileMetaInfo;
private List stripeStats;
private List types;
private boolean[] includedCols;
@@ -1078,7 +1077,7 @@ public SplitGenerator(SplitInfo splitInfo, UserGroupInformation ugi,
this.file = splitInfo.fileWithId.getFileStatus();
this.fsFileId = splitInfo.fileWithId.getFileId();
this.blockSize = this.file.getBlockSize();
- this.fileInfo = splitInfo.fileInfo;
+ this.orcTail = splitInfo.orcTail;
// TODO: potential DFS call
this.locations = SHIMS.getLocationsWithOffset(fs, file);
this.isOriginal = splitInfo.isOriginal;
@@ -1129,11 +1128,10 @@ static long getOverlap(long offset1, long length1,
* are written with large block sizes.
* @param offset the start of the split
* @param length the length of the split
- * @param fileMetaInfo file metadata from footer and postscript
+ * @param orcTail orc tail
* @throws IOException
*/
- OrcSplit createSplit(long offset, long length,
- FileMetaInfo fileMetaInfo) throws IOException {
+ OrcSplit createSplit(long offset, long length, OrcTail orcTail) throws IOException {
String[] hosts;
Map.Entry startEntry = locations.floorEntry(offset);
BlockLocation start = startEntry.getValue();
@@ -1196,7 +1194,7 @@ OrcSplit createSplit(long offset, long length,
fileKey = new SyntheticFileId(file);
}
return new OrcSplit(file.getPath(), fileKey, offset, length, hosts,
- fileMetaInfo, isOriginal, hasBase, deltas, scaledProjSize);
+ orcTail, isOriginal, hasBase, deltas, scaledProjSize, fileLen);
}
private static final class OffsetAndLength { // Java cruft; pair of long.
@@ -1271,7 +1269,7 @@ public String toString() {
int index = si.getIndex();
if (lastIdx >= 0 && lastIdx + 1 != index && current.offset != -1) {
// Create split for the previous unfinished stripe.
- splits.add(createSplit(current.offset, current.length, null));
+ splits.add(createSplit(current.offset, current.length, orcTail));
current.offset = -1;
}
lastIdx = index;
@@ -1305,16 +1303,16 @@ public String toString() {
if (!includeStripe[idx]) {
// create split for the previous unfinished stripe
if (current.offset != -1) {
- splits.add(createSplit(current.offset, current.length, fileMetaInfo));
+ splits.add(createSplit(current.offset, current.length, orcTail));
current.offset = -1;
}
continue;
}
current = generateOrUpdateSplit(
- splits, current, stripe.getOffset(), stripe.getLength(), fileMetaInfo);
+ splits, current, stripe.getOffset(), stripe.getLength(), orcTail);
}
- generateLastSplit(splits, current, fileMetaInfo);
+ generateLastSplit(splits, current, orcTail);
// Add uncovered ACID delta splits.
splits.addAll(deltaSplits);
@@ -1323,12 +1321,12 @@ public String toString() {
private OffsetAndLength generateOrUpdateSplit(
List splits, OffsetAndLength current, long offset,
- long length, FileMetaInfo fileMetaInfo) throws IOException {
+ long length, OrcTail orcTail) throws IOException {
// if we are working on a stripe, over the min stripe size, and
// crossed a block boundary, cut the input split here.
if (current.offset != -1 && current.length > context.minSize &&
(current.offset / blockSize != offset / blockSize)) {
- splits.add(createSplit(current.offset, current.length, fileMetaInfo));
+ splits.add(createSplit(current.offset, current.length, orcTail));
current.offset = -1;
}
// if we aren't building a split, start a new one.
@@ -1339,59 +1337,41 @@ private OffsetAndLength generateOrUpdateSplit(
current.length = (offset + length) - current.offset;
}
if (current.length >= context.maxSize) {
- splits.add(createSplit(current.offset, current.length, fileMetaInfo));
+ splits.add(createSplit(current.offset, current.length, orcTail));
current.offset = -1;
}
return current;
}
private void generateLastSplit(List splits, OffsetAndLength current,
- FileMetaInfo fileMetaInfo) throws IOException {
+ OrcTail orcTail) throws IOException {
if (current.offset == -1) return;
- splits.add(createSplit(current.offset, current.length, fileMetaInfo));
+ splits.add(createSplit(current.offset, current.length, orcTail));
}
private void populateAndCacheStripeDetails() throws IOException {
- // Only create OrcReader if we are missing some information.
- List colStatsLocal;
- List typesLocal;
- if (fileInfo != null) {
- stripes = fileInfo.stripeInfos;
- stripeStats = fileInfo.stripeStats;
- fileMetaInfo = fileInfo.fileMetaInfo;
- typesLocal = types = fileInfo.types;
- colStatsLocal = fileInfo.fileStats;
- writerVersion = fileInfo.writerVersion;
- // For multiple runs, in case sendSplitsInFooter changes
- if (fileMetaInfo == null && context.footerInSplits) {
- Reader orcReader = createOrcReader();
- fileInfo.fileMetaInfo = ((ReaderImpl) orcReader).getFileMetaInfo();
- assert fileInfo.stripeStats != null && fileInfo.types != null
- && fileInfo.writerVersion != null;
- // We assume that if we needed to create a reader, we need to cache it to meta cache.
- // This will also needlessly overwrite it in local cache for now.
- context.footerCache.put(fsFileId, file, fileInfo.fileMetaInfo, orcReader);
- }
- } else {
- Reader orcReader = createOrcReader();
- stripes = orcReader.getStripes();
- typesLocal = types = orcReader.getTypes();
- colStatsLocal = orcReader.getOrcProtoFileStatistics();
- writerVersion = orcReader.getWriterVersion();
- stripeStats = orcReader.getStripeStatistics();
- fileMetaInfo = context.footerInSplits ?
- ((ReaderImpl) orcReader).getFileMetaInfo() : null;
+ if (orcTail == null) {
+ Reader orcReader = OrcFile.createReader(file.getPath(),
+ OrcFile.readerOptions(context.conf)
+ .filesystem(fs)
+ .maxLength(file.getLen()));
+ orcTail = new OrcTail(orcReader.getFileTail(), orcReader.getSerializedFileFooter(),
+ file.getModificationTime());
if (context.cacheStripeDetails) {
- context.footerCache.put(fsFileId, file, fileMetaInfo, orcReader);
+ context.footerCache.put(new FooterCacheKey(fsFileId, file.getPath()), orcTail);
}
}
+ stripes = orcTail.getStripes();
+ stripeStats = orcTail.getStripeStatistics();
+ types = orcTail.getTypes();
+ writerVersion = orcTail.getWriterVersion();
includedCols = genIncludedColumns(types, context.conf, isOriginal);
- projColsUncompressedSize = computeProjectionSize(typesLocal, colStatsLocal, includedCols, isOriginal);
- }
-
- private Reader createOrcReader() throws IOException {
- return OrcFile.createReader(file.getPath(),
- OrcFile.readerOptions(context.conf).filesystem(fs).maxLength(file.getLen()));
+ List fileColStats = orcTail.getFooter().getStatisticsList();
+ projColsUncompressedSize = computeProjectionSize(types, fileColStats, includedCols,
+ isOriginal);
+ if (!context.footerInSplits) {
+ orcTail = null;
+ }
}
private long computeProjectionSize(List types,
@@ -1595,40 +1575,6 @@ private static void scheduleSplits(ETLSplitStrategy splitStrategy, Context conte
return result.toArray(new InputSplit[result.size()]);
}
- /**
- * FileInfo.
- *
- * Stores information relevant to split generation for an ORC File.
- *
- */
- static class FileInfo {
- final long modificationTime;
- final long size;
- final Long fileId;
- private final List stripeInfos;
- private FileMetaInfo fileMetaInfo;
- private final List stripeStats;
- private final List fileStats;
- private final List types;
- private final OrcFile.WriterVersion writerVersion;
-
-
- FileInfo(long modificationTime, long size, List stripeInfos,
- List stripeStats, List types,
- List fileStats, FileMetaInfo fileMetaInfo,
- OrcFile.WriterVersion writerVersion, Long fileId) {
- this.modificationTime = modificationTime;
- this.size = size;
- this.fileId = fileId;
- this.stripeInfos = stripeInfos;
- this.fileMetaInfo = fileMetaInfo;
- this.stripeStats = stripeStats;
- this.types = types;
- this.fileStats = fileStats;
- this.writerVersion = writerVersion;
- }
- }
-
@SuppressWarnings("unchecked")
private org.apache.hadoop.mapred.RecordReader
createVectorizedReader(InputSplit split, JobConf conf, Reporter reporter
@@ -1643,18 +1589,22 @@ private static void scheduleSplits(ETLSplitStrategy splitStrategy, Context conte
Reporter reporter) throws IOException {
boolean vectorMode = Utilities.getUseVectorizedInputFileFormat(conf);
boolean isAcidRead = isAcidRead(conf, inputSplit);
-
if (!isAcidRead) {
if (vectorMode) {
return createVectorizedReader(inputSplit, conf, reporter);
} else {
+ OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
+ if (inputSplit instanceof OrcSplit) {
+ OrcSplit split = (OrcSplit) inputSplit;
+ readerOptions.maxLength(split.getFileLength()).orcTail(split.getOrcTail());
+ }
return new OrcRecordReader(OrcFile.createReader(
((FileSplit) inputSplit).getPath(),
- OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit);
+ readerOptions),
+ conf, (FileSplit) inputSplit);
}
}
- OrcSplit split = (OrcSplit) inputSplit;
reporter.setStatus(inputSplit.toString());
Options options = new Options(conf).reporter(reporter);
@@ -1759,7 +1709,12 @@ public float getProgress() throws IOException {
if (split.hasBase()) {
bucket = AcidUtils.parseBaseBucketFilename(split.getPath(), conf)
.getBucket();
- reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+ OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf)
+ .maxLength(split.getFileLength());
+ if (split.hasFooter()) {
+ readerOptions.orcTail(split.getOrcTail());
+ }
+ reader = OrcFile.createReader(path, readerOptions);
} else {
bucket = (int) split.getStart();
reader = null;
@@ -1982,16 +1937,32 @@ private static boolean isStripeSatisfyPredicate(
* Represents footer cache.
*/
public interface FooterCache {
- static final ByteBuffer NO_SPLIT_AFTER_PPD = ByteBuffer.wrap(new byte[0]);
+ ByteBuffer NO_SPLIT_AFTER_PPD = ByteBuffer.wrap(new byte[0]);
void getAndValidate(List files, boolean isOriginal,
- FileInfo[] result, ByteBuffer[] ppdResult) throws IOException, HiveException;
+ OrcTail[] result, ByteBuffer[] ppdResult) throws IOException, HiveException;
boolean hasPpd();
boolean isBlocking();
- void put(Long fileId, FileStatus file, FileMetaInfo fileMetaInfo, Reader orcReader)
- throws IOException;
+ void put(FooterCacheKey cacheKey, OrcTail orcTail) throws IOException;
}
+ public static class FooterCacheKey {
+ Long fileId; // used by external cache
+ Path path; // used by local cache
+
+ FooterCacheKey(Long fileId, Path path) {
+ this.fileId = fileId;
+ this.path = path;
+ }
+
+ public Long getFileId() {
+ return fileId;
+ }
+
+ public Path getPath() {
+ return path;
+ }
+ }
/**
* Convert a Hive type property string that contains separated type names into a list of
* TypeDescription objects.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java
index 2e63aba..0c85827 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java
@@ -20,27 +20,25 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
-import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.orc.FileMetaInfo;
+import org.apache.orc.OrcProto;
+import org.apache.orc.impl.OrcTail;
/**
* OrcFileSplit. Holds file meta info
*
*/
public class OrcNewSplit extends FileSplit {
- private FileMetaInfo fileMetaInfo;
+ private OrcTail orcTail;
private boolean hasFooter;
private boolean isOriginal;
private boolean hasBase;
private final List deltas = new ArrayList<>();
- private OrcFile.WriterVersion writerVersion;
protected OrcNewSplit(){
//The FileSplit() constructor in hadoop 0.20 and 1.x is package private so can't use it.
@@ -52,7 +50,7 @@ protected OrcNewSplit(){
public OrcNewSplit(OrcSplit inner) throws IOException {
super(inner.getPath(), inner.getStart(), inner.getLength(),
inner.getLocations());
- this.fileMetaInfo = inner.getFileMetaInfo();
+ this.orcTail = inner.getOrcTail();
this.hasFooter = inner.hasFooter();
this.isOriginal = inner.isOriginal();
this.hasBase = inner.hasBase();
@@ -73,20 +71,11 @@ public void write(DataOutput out) throws IOException {
delta.write(out);
}
if (hasFooter) {
- // serialize FileMetaInfo fields
- Text.writeString(out, fileMetaInfo.compressionType);
- WritableUtils.writeVInt(out, fileMetaInfo.bufferSize);
- WritableUtils.writeVInt(out, fileMetaInfo.metadataSize);
-
- // serialize FileMetaInfo field footer
- ByteBuffer footerBuff = fileMetaInfo.footerBuffer;
- footerBuff.reset();
-
- // write length of buffer
- WritableUtils.writeVInt(out, footerBuff.limit() - footerBuff.position());
- out.write(footerBuff.array(), footerBuff.position(),
- footerBuff.limit() - footerBuff.position());
- WritableUtils.writeVInt(out, fileMetaInfo.writerVersion.getId());
+ OrcProto.FileTail fileTail = orcTail.getMinimalFileTail();
+ byte[] tailBuffer = fileTail.toByteArray();
+ int tailLen = tailBuffer.length;
+ WritableUtils.writeVInt(out, tailLen);
+ out.write(tailBuffer);
}
}
@@ -108,25 +97,16 @@ public void readFields(DataInput in) throws IOException {
deltas.add(dmd);
}
if (hasFooter) {
- // deserialize FileMetaInfo fields
- String compressionType = Text.readString(in);
- int bufferSize = WritableUtils.readVInt(in);
- int metadataSize = WritableUtils.readVInt(in);
-
- // deserialize FileMetaInfo field footer
- int footerBuffSize = WritableUtils.readVInt(in);
- ByteBuffer footerBuff = ByteBuffer.allocate(footerBuffSize);
- in.readFully(footerBuff.array(), 0, footerBuffSize);
- OrcFile.WriterVersion writerVersion =
- ReaderImpl.getWriterVersion(WritableUtils.readVInt(in));
-
- fileMetaInfo = new FileMetaInfo(compressionType, bufferSize,
- metadataSize, footerBuff, writerVersion);
+ int tailLen = WritableUtils.readVInt(in);
+ byte[] tailBuffer = new byte[tailLen];
+ in.readFully(tailBuffer);
+ OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer);
+ orcTail = new OrcTail(fileTail, null);
}
}
- FileMetaInfo getFileMetaInfo(){
- return fileMetaInfo;
+ public OrcTail getOrcTail() {
+ return orcTail;
}
public boolean hasFooter() {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
index 407fd62..969e70e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
@@ -18,24 +18,26 @@
package org.apache.hadoop.hive.ql.io.orc;
+import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataOutput;
+import java.io.DataOutputStream;
import java.io.IOException;
-import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
-import org.apache.orc.FileMetaInfo;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.ColumnarSplit;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
+import org.apache.hadoop.hive.ql.io.ColumnarSplit;
import org.apache.hadoop.hive.ql.io.LlapAwareSplit;
import org.apache.hadoop.hive.ql.io.SyntheticFileId;
-import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.FileSplit;
-
+import org.apache.orc.OrcProto;
+import org.apache.orc.impl.OrcTail;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
@@ -43,13 +45,15 @@
*
*/
public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit {
- private FileMetaInfo fileMetaInfo;
+ private static final Logger LOG = LoggerFactory.getLogger(OrcSplit.class);
+ private OrcTail orcTail;
private boolean hasFooter;
private boolean isOriginal;
private boolean hasBase;
private final List deltas = new ArrayList<>();
private long projColsUncompressedSize;
private transient Object fileKey;
+ private long fileLen;
static final int HAS_SYNTHETIC_FILEID_FLAG = 16;
static final int HAS_LONG_FILEID_FLAG = 8;
@@ -65,25 +69,40 @@ protected OrcSplit() {
}
public OrcSplit(Path path, Object fileId, long offset, long length, String[] hosts,
- FileMetaInfo fileMetaInfo, boolean isOriginal, boolean hasBase,
- List deltas, long projectedDataSize) {
+ OrcTail orcTail, boolean isOriginal, boolean hasBase,
+ List deltas, long projectedDataSize, long fileLen) {
super(path, offset, length, hosts);
// For HDFS, we could avoid serializing file ID and just replace the path with inode-based
// path. However, that breaks bunch of stuff because Hive later looks up things by split path.
this.fileKey = fileId;
- this.fileMetaInfo = fileMetaInfo;
- hasFooter = this.fileMetaInfo != null;
+ this.orcTail = orcTail;
+ hasFooter = this.orcTail != null;
this.isOriginal = isOriginal;
this.hasBase = hasBase;
this.deltas.addAll(deltas);
this.projColsUncompressedSize = projectedDataSize <= 0 ? length : projectedDataSize;
+ // setting file length to Long.MAX_VALUE will let orc reader read file length from file system
+ this.fileLen = fileLen <= 0 ? Long.MAX_VALUE : fileLen;
}
@Override
public void write(DataOutput out) throws IOException {
- //serialize path, offset, length using FileSplit
- super.write(out);
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(bos);
+ // serialize path, offset, length using FileSplit
+ super.write(dos);
+ int required = bos.size();
+
+ // write addition payload required for orc
+ writeAdditionalPayload(dos);
+ int additional = bos.size() - required;
+
+ out.write(bos.toByteArray());
+ LOG.info("Writing additional {} bytes to OrcSplit as payload. Required {} bytes.", additional,
+ required);
+ }
+ private void writeAdditionalPayload(final DataOutputStream out) throws IOException {
boolean isFileIdLong = fileKey instanceof Long, isFileIdWritable = fileKey instanceof Writable;
int flags = (hasBase ? BASE_FLAG : 0) |
(isOriginal ? ORIGINAL_FLAG : 0) |
@@ -96,26 +115,18 @@ public void write(DataOutput out) throws IOException {
delta.write(out);
}
if (hasFooter) {
- // serialize FileMetaInfo fields
- Text.writeString(out, fileMetaInfo.compressionType);
- WritableUtils.writeVInt(out, fileMetaInfo.bufferSize);
- WritableUtils.writeVInt(out, fileMetaInfo.metadataSize);
-
- // serialize FileMetaInfo field footer
- ByteBuffer footerBuff = fileMetaInfo.footerBuffer;
- footerBuff.reset();
-
- // write length of buffer
- WritableUtils.writeVInt(out, footerBuff.limit() - footerBuff.position());
- out.write(footerBuff.array(), footerBuff.position(),
- footerBuff.limit() - footerBuff.position());
- WritableUtils.writeVInt(out, fileMetaInfo.writerVersion.getId());
+ OrcProto.FileTail fileTail = orcTail.getMinimalFileTail();
+ byte[] tailBuffer = fileTail.toByteArray();
+ int tailLen = tailBuffer.length;
+ WritableUtils.writeVInt(out, tailLen);
+ out.write(tailBuffer);
}
if (isFileIdLong) {
out.writeLong(((Long)fileKey).longValue());
} else if (isFileIdWritable) {
((Writable)fileKey).write(out);
}
+ out.writeLong(fileLen);
}
@Override
@@ -141,20 +152,11 @@ public void readFields(DataInput in) throws IOException {
deltas.add(dmd);
}
if (hasFooter) {
- // deserialize FileMetaInfo fields
- String compressionType = Text.readString(in);
- int bufferSize = WritableUtils.readVInt(in);
- int metadataSize = WritableUtils.readVInt(in);
-
- // deserialize FileMetaInfo field footer
- int footerBuffSize = WritableUtils.readVInt(in);
- ByteBuffer footerBuff = ByteBuffer.allocate(footerBuffSize);
- in.readFully(footerBuff.array(), 0, footerBuffSize);
- OrcFile.WriterVersion writerVersion =
- ReaderImpl.getWriterVersion(WritableUtils.readVInt(in));
-
- fileMetaInfo = new FileMetaInfo(compressionType, bufferSize,
- metadataSize, footerBuff, writerVersion);
+ int tailLen = WritableUtils.readVInt(in);
+ byte[] tailBuffer = new byte[tailLen];
+ in.readFully(tailBuffer);
+ OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer);
+ orcTail = new OrcTail(fileTail, null);
}
if (hasLongFileId) {
fileKey = in.readLong();
@@ -163,10 +165,11 @@ public void readFields(DataInput in) throws IOException {
fileId.readFields(in);
this.fileKey = fileId;
}
+ fileLen = in.readLong();
}
- FileMetaInfo getFileMetaInfo(){
- return fileMetaInfo;
+ public OrcTail getOrcTail() {
+ return orcTail;
}
public boolean hasFooter() {
@@ -185,6 +188,10 @@ public boolean hasBase() {
return deltas;
}
+ public long getFileLength() {
+ return fileLen;
+ }
+
/**
* If this method returns true, then for sure it is ACID.
* However, if it returns false.. it could be ACID or non-ACID.
@@ -215,7 +222,7 @@ public boolean canUseLlapIo() {
@Override
public String toString() {
return "OrcSplit [" + getPath() + ", start=" + getStart() + ", length=" + getLength()
- + ", isOriginal=" + isOriginal + ", hasBase=" + hasBase + ", deltas="
- + (deltas == null ? 0 : deltas.size()) + "]";
+ + ", isOriginal=" + isOriginal + ", fileLength=" + fileLen + ", hasFooter=" + hasFooter +
+ ", hasBase=" + hasBase + ", deltas=" + (deltas == null ? 0 : deltas.size()) + "]";
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 0b40fef..9bcdb39 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -20,47 +20,22 @@
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.ArrayList;
import java.util.Arrays;
-import java.util.List;
-import org.apache.orc.impl.BufferChunk;
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.FileMetaInfo;
-import org.apache.orc.FileMetadata;
-import org.apache.orc.impl.InStream;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.StripeStatistics;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.io.DiskRange;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.orc.OrcProto;
-
-import com.google.common.collect.Lists;
-import com.google.protobuf.CodedInputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class ReaderImpl extends org.apache.orc.impl.ReaderImpl
implements Reader {
private static final Logger LOG = LoggerFactory.getLogger(ReaderImpl.class);
- private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
-
private final ObjectInspector inspector;
- //serialized footer - Keeping this around for use by getFileMetaInfo()
- // will help avoid cpu cycles spend in deserializing at cost of increased
- // memory footprint.
- private ByteBuffer footerByteBuffer;
- // Same for metastore cache - maintains the same background buffer, but includes postscript.
- // This will only be set if the file footer/metadata was read from disk.
- private ByteBuffer footerMetaAndPsBuffer;
-
@Override
public ObjectInspector getObjectInspector() {
return inspector;
@@ -83,268 +58,14 @@ public ObjectInspector getObjectInspector() {
* @param options options for reading
* @throws IOException
*/
- public ReaderImpl(Path path,
- OrcFile.ReaderOptions options) throws IOException {
+ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
super(path, options);
- FileMetadata fileMetadata = options.getFileMetadata();
- if (fileMetadata != null) {
- this.inspector = OrcStruct.createObjectInspector(0, fileMetadata.getTypes());
- } else {
- FileMetaInfo footerMetaData;
- if (options.getFileMetaInfo() != null) {
- footerMetaData = options.getFileMetaInfo();
- } else {
- footerMetaData = extractMetaInfoFromFooter(fileSystem, path,
- options.getMaxLength());
- }
- this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
- MetaInfoObjExtractor rInfo =
- new MetaInfoObjExtractor(footerMetaData.compressionType,
- footerMetaData.bufferSize,
- footerMetaData.metadataSize,
- footerMetaData.footerBuffer
- );
- this.footerByteBuffer = footerMetaData.footerBuffer;
- this.inspector = rInfo.inspector;
- }
- }
-
- /** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */
- public static FooterInfo extractMetaInfoFromFooter(
- ByteBuffer bb, Path srcPath) throws IOException {
- // Read the PostScript. Be very careful as some parts of this historically use bb position
- // and some use absolute offsets that have to take position into account.
- int baseOffset = bb.position();
- int lastByteAbsPos = baseOffset + bb.remaining() - 1;
- int psLen = bb.get(lastByteAbsPos) & 0xff;
- int psAbsPos = lastByteAbsPos - psLen;
- OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos);
- assert baseOffset == bb.position();
-
- // Extract PS information.
- int footerSize = (int)ps.getFooterLength(), metadataSize = (int)ps.getMetadataLength(),
- footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize;
- String compressionType = ps.getCompression().toString();
- CompressionCodec codec =
- WriterImpl.createCodec(org.apache.orc.CompressionKind.valueOf
- (compressionType));
- int bufferSize = (int)ps.getCompressionBlockSize();
- bb.position(metadataAbsPos);
- bb.mark();
-
- // Extract metadata and footer.
- OrcProto.Metadata metadata = extractMetadata(
- bb, metadataAbsPos, metadataSize, codec, bufferSize);
- List stats = new ArrayList<>(metadata.getStripeStatsCount());
- for (OrcProto.StripeStatistics ss : metadata.getStripeStatsList()) {
- stats.add(new StripeStatistics(ss.getColStatsList()));
- }
- OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize);
- bb.position(metadataAbsPos);
- bb.limit(psAbsPos);
- // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess...
- FileMetaInfo fmi = new FileMetaInfo(
- compressionType, bufferSize, metadataSize, bb, extractWriterVersion(ps));
- return new FooterInfo(stats, footer, fmi);
- }
-
- private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
- int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
- bb.position(footerAbsPos);
- bb.limit(footerAbsPos + footerSize);
- return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer",
- Lists.newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
- }
-
- private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
- int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
- bb.position(metadataAbsPos);
- bb.limit(metadataAbsPos + metadataSize);
- return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata",
- Lists.newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize));
- }
-
- private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
- int psLen, int psAbsOffset) throws IOException {
- // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
- assert bb.hasArray();
- CodedInputStream in = CodedInputStream.newInstance(
- bb.array(), bb.arrayOffset() + psAbsOffset, psLen);
- OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
- checkOrcVersion(LOG, path, ps.getVersionList());
-
- // Check compression codec.
- switch (ps.getCompression()) {
- case NONE:
- break;
- case ZLIB:
- break;
- case SNAPPY:
- break;
- case LZO:
- break;
- default:
- throw new IllegalArgumentException("Unknown compression");
- }
- return ps;
- }
-
- private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
- Path path,
- long maxFileLength
- ) throws IOException {
- FSDataInputStream file = fs.open(path);
- ByteBuffer buffer = null, fullFooterBuffer = null;
- OrcProto.PostScript ps = null;
- OrcFile.WriterVersion writerVersion = null;
- try {
- // figure out the size of the file using the option or filesystem
- long size;
- if (maxFileLength == Long.MAX_VALUE) {
- size = fs.getFileStatus(path).getLen();
- } else {
- size = maxFileLength;
- }
-
- //read last bytes into buffer to get PostScript
- int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
- buffer = ByteBuffer.allocate(readSize);
- assert buffer.position() == 0;
- file.readFully((size - readSize),
- buffer.array(), buffer.arrayOffset(), readSize);
- buffer.position(0);
-
- //read the PostScript
- //get length of PostScript
- int psLen = buffer.get(readSize - 1) & 0xff;
- ensureOrcFooter(file, path, psLen, buffer);
- int psOffset = readSize - 1 - psLen;
- ps = extractPostScript(buffer, path, psLen, psOffset);
-
- int footerSize = (int) ps.getFooterLength();
- int metadataSize = (int) ps.getMetadataLength();
- writerVersion = extractWriterVersion(ps);
-
- //check if extra bytes need to be read
- int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
- if (extra > 0) {
- //more bytes need to be read, seek back to the right place and read extra bytes
- ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
- file.readFully((size - readSize - extra), extraBuf.array(),
- extraBuf.arrayOffset() + extraBuf.position(), extra);
- extraBuf.position(extra);
- //append with already read bytes
- extraBuf.put(buffer);
- buffer = extraBuf;
- buffer.position(0);
- fullFooterBuffer = buffer.slice();
- buffer.limit(footerSize + metadataSize);
- } else {
- //footer is already in the bytes in buffer, just adjust position, length
- buffer.position(psOffset - footerSize - metadataSize);
- fullFooterBuffer = buffer.slice();
- buffer.limit(psOffset);
- }
-
- // remember position for later TODO: what later? this comment is useless
- buffer.mark();
- } finally {
- try {
- file.close();
- } catch (IOException ex) {
- LOG.error("Failed to close the file after another error", ex);
- }
- }
-
- return new FileMetaInfo(
- ps.getCompression().toString(),
- (int) ps.getCompressionBlockSize(),
- (int) ps.getMetadataLength(),
- buffer,
- ps.getVersionList(),
- writerVersion,
- fullFooterBuffer
- );
- }
-
- /**
- * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
- * from serialized fields.
- * As the fields are final, the fields need to be initialized in the constructor and
- * can't be done in some helper function. So this helper class is used instead.
- *
- */
- private static class MetaInfoObjExtractor{
- final org.apache.orc.CompressionKind compressionKind;
- final CompressionCodec codec;
- final int bufferSize;
- final int metadataSize;
- final OrcProto.Metadata metadata;
- final OrcProto.Footer footer;
- final ObjectInspector inspector;
-
- MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer) throws IOException {
-
- this.compressionKind = org.apache.orc.CompressionKind.valueOf(codecStr.toUpperCase());
- this.bufferSize = bufferSize;
- this.codec = WriterImpl.createCodec(compressionKind);
- this.metadataSize = metadataSize;
-
- int position = footerBuffer.position();
- int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
-
- this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
- this.footer = extractFooter(
- footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);
-
- footerBuffer.position(position);
- this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList());
- }
- }
-
- public FileMetaInfo getFileMetaInfo() {
- return new FileMetaInfo(compressionKind.toString(), bufferSize,
- getMetadataSize(), footerByteBuffer, getVersionList(),
- getWriterVersion(), footerMetaAndPsBuffer);
- }
-
- /** Same as FileMetaInfo, but with extra fields. FileMetaInfo is serialized for splits
- * and so we don't just add fields to it, it's already messy and confusing. */
- public static final class FooterInfo {
- private final OrcProto.Footer footer;
- private final List metadata;
- private final List stripes;
- private final FileMetaInfo fileMetaInfo;
-
- private FooterInfo(
- List metadata, OrcProto.Footer footer, FileMetaInfo fileMetaInfo) {
- this.metadata = metadata;
- this.footer = footer;
- this.fileMetaInfo = fileMetaInfo;
- this.stripes = convertProtoStripesToStripes(footer.getStripesList());
- }
-
- public OrcProto.Footer getFooter() {
- return footer;
- }
-
- public List getMetadata() {
- return metadata;
- }
-
- public FileMetaInfo getFileMetaInfo() {
- return fileMetaInfo;
- }
-
- public List getStripes() {
- return stripes;
- }
+ this.inspector = OrcStruct.createObjectInspector(0, types);
}
@Override
public ByteBuffer getSerializedFileFooter() {
- return footerMetaAndPsBuffer;
+ return tail.getSerializedTail();
}
@Override
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index e4d2e6e..32ac34e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -182,8 +182,9 @@ public VectorizedOrcInputFormat() {
if(fSplit instanceof OrcSplit){
OrcSplit orcSplit = (OrcSplit) fSplit;
if (orcSplit.hasFooter()) {
- opts.fileMetaInfo(orcSplit.getFileMetaInfo());
+ opts.orcTail(orcSplit.getOrcTail());
}
+ opts.maxLength(orcSplit.getFileLength());
}
Reader reader = OrcFile.createReader(path, opts);
return new VectorizedOrcRecordReader(reader, conf, fSplit);
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index edaecb3..c414801 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -649,7 +649,7 @@ public void testSplitGenReadOps() throws Exception {
}
// call-1: listLocatedStatus - mock:/mocktable
// call-2: open - mock:/mocktable/0_0
- // call-3: open - mock:/mocktable/0_0
+ // call-3: open - mock:/mocktable/0_1
assertEquals(3, readOpsDelta);
assertEquals(2, splits.length);
@@ -658,6 +658,704 @@ public void testSplitGenReadOps() throws Exception {
}
@Test
+ public void testSplitGenReadOpsLocalCache() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktbl");
+ conf.set("hive.orc.cache.stripe.details.size", "-1");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: listLocatedStatus - mock:/mocktable
+ // call-2: open - mock:/mocktable/0_0
+ // call-3: open - mock:/mocktable/0_1
+ assertEquals(3, readOpsDelta);
+
+ // force BI to avoid reading footers
+ conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ orcInputFormat = new OrcInputFormat();
+ splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: listLocatedStatus - mock:/mocktable
+ assertEquals(1, readOpsDelta);
+
+ // enable cache and use default strategy
+ conf.set("hive.orc.cache.stripe.details.size", "100");
+ conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "HYBRID");
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ orcInputFormat = new OrcInputFormat();
+ splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: listLocatedStatus - mock:/mocktable
+ // call-2: open - mock:/mocktable/0_0
+ // call-3: open - mock:/mocktable/0_1
+ assertEquals(3, readOpsDelta);
+
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ orcInputFormat = new OrcInputFormat();
+ splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: listLocatedStatus - mock:/mocktable
+ assertEquals(1, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
+ public void testNonVectorReaderNoFooterSerialize() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable1");
+ conf.set("hive.orc.splits.include.file.footer", "false");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+
+ for (InputSplit split : splits) {
+ assertTrue("OrcSplit is expected", split instanceof OrcSplit);
+ // ETL strategies will have start=3 (start of first stripe)
+ assertTrue(split.toString().contains("start=3"));
+ assertTrue(split.toString().contains("hasFooter=false"));
+ assertTrue(split.toString().contains("hasBase=true"));
+ assertTrue(split.toString().contains("deltas=0"));
+ if (split instanceof OrcSplit) {
+ assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" +
+ " orc splits.", ((OrcSplit) split).hasFooter());
+ }
+ orcInputFormat.getRecordReader(split, conf, null);
+ }
+
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: open to read footer - split 1 => mock:/mocktable1/0_0
+ // call-2: open to read data - split 1 => mock:/mocktable1/0_0
+ // call-3: open to read footer - split 2 => mock:/mocktable1/0_1
+ // call-4: open to read data - split 2 => mock:/mocktable1/0_1
+ assertEquals(4, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
+ public void testNonVectorReaderFooterSerialize() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable2");
+ conf.set("hive.orc.splits.include.file.footer", "true");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+
+ for (InputSplit split : splits) {
+ assertTrue("OrcSplit is expected", split instanceof OrcSplit);
+ // ETL strategies will have start=3 (start of first stripe)
+ assertTrue(split.toString().contains("start=3"));
+ assertTrue(split.toString().contains("hasFooter=true"));
+ assertTrue(split.toString().contains("hasBase=true"));
+ assertTrue(split.toString().contains("deltas=0"));
+ if (split instanceof OrcSplit) {
+ assertTrue("Footer serialize test for non-vector reader, hasFooter is expected in" +
+ " orc splits.", ((OrcSplit) split).hasFooter());
+ }
+ orcInputFormat.getRecordReader(split, conf, null);
+ }
+
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: open to read data - split 1 => mock:/mocktable2/0_0
+ // call-2: open to read data - split 2 => mock:/mocktable2/0_1
+ assertEquals(2, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
+ public void testVectorReaderNoFooterSerialize() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable3");
+ conf.set("hive.orc.splits.include.file.footer", "false");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
+ "mocktable3", inspector, true, 0);
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for (int i = 0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2 * i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for (int i = 0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2 * i));
+ }
+ writer.close();
+
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+
+ for (InputSplit split : splits) {
+ assertTrue("OrcSplit is expected", split instanceof OrcSplit);
+ // ETL strategies will have start=3 (start of first stripe)
+ assertTrue(split.toString().contains("start=3"));
+ assertTrue(split.toString().contains("hasFooter=false"));
+ assertTrue(split.toString().contains("hasBase=true"));
+ assertTrue(split.toString().contains("deltas=0"));
+ if (split instanceof OrcSplit) {
+ assertFalse("No footer serialize test for vector reader, hasFooter is not expected in" +
+ " orc splits.", ((OrcSplit) split).hasFooter());
+ }
+ orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
+ }
+
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: open to read footer - split 1 => mock:/mocktable3/0_0
+ // call-2: open to read data - split 1 => mock:/mocktable3/0_0
+ // call-3: open to read footer - split 2 => mock:/mocktable3/0_1
+ // call-4: open to read data - split 2 => mock:/mocktable3/0_1
+ assertEquals(4, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
+ public void testVectorReaderFooterSerialize() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable4");
+ conf.set("hive.orc.splits.include.file.footer", "true");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
+ "mocktable4", inspector, true, 0);
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for (int i = 0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2 * i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for (int i = 0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2 * i));
+ }
+ writer.close();
+
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+
+ for (InputSplit split : splits) {
+ assertTrue("OrcSplit is expected", split instanceof OrcSplit);
+ // ETL strategies will have start=3 (start of first stripe)
+ assertTrue(split.toString().contains("start=3"));
+ assertTrue(split.toString().contains("hasFooter=true"));
+ assertTrue(split.toString().contains("hasBase=true"));
+ assertTrue(split.toString().contains("deltas=0"));
+ if (split instanceof OrcSplit) {
+ assertTrue("Footer serialize test for vector reader, hasFooter is expected in" +
+ " orc splits.", ((OrcSplit) split).hasFooter());
+ }
+ orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
+ }
+
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: open to read data - split 1 => mock:/mocktable4/0_0
+ // call-2: open to read data - split 2 => mock:/mocktable4/0_1
+ assertEquals(2, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
+ public void testACIDReaderNoFooterSerialize() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable5");
+ conf.set("hive.transactional.table.scan", "true");
+ conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
+ conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
+ conf.set("hive.orc.splits.include.file.footer", "false");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+
+ for (InputSplit split : splits) {
+ assertTrue("OrcSplit is expected", split instanceof OrcSplit);
+ // ETL strategies will have start=3 (start of first stripe)
+ assertTrue(split.toString().contains("start=3"));
+ assertTrue(split.toString().contains("hasFooter=false"));
+ assertTrue(split.toString().contains("hasBase=true"));
+ assertTrue(split.toString().contains("deltas=0"));
+ if (split instanceof OrcSplit) {
+ assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" +
+ " orc splits.", ((OrcSplit) split).hasFooter());
+ }
+ orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
+ }
+
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: open to read footer - split 1 => mock:/mocktable5/0_0
+ // call-2: open to read data - split 1 => mock:/mocktable5/0_0
+ // call-3: open to read footer - split 2 => mock:/mocktable5/0_1
+ // call-4: open to read data - split 2 => mock:/mocktable5/0_1
+ assertEquals(4, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
+ public void testACIDReaderFooterSerialize() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable6");
+ conf.set("hive.transactional.table.scan", "true");
+ conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
+ conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
+ conf.set("hive.orc.splits.include.file.footer", "true");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(2, splits.length);
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+
+ for (InputSplit split : splits) {
+ assertTrue("OrcSplit is expected", split instanceof OrcSplit);
+ // ETL strategies will have start=3 (start of first stripe)
+ assertTrue(split.toString().contains("start=3"));
+ assertTrue(split.toString().contains("hasFooter=true"));
+ assertTrue(split.toString().contains("hasBase=true"));
+ assertTrue(split.toString().contains("deltas=0"));
+ if (split instanceof OrcSplit) {
+ assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" +
+ " orc splits.", ((OrcSplit) split).hasFooter());
+ }
+ orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
+ }
+
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: open to read data - split 1 => mock:/mocktable6/0_0
+ // call-2: open to read data - split 2 => mock:/mocktable6/0_1
+ assertEquals(2, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
+ public void testACIDReaderNoFooterSerializeWithDeltas() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable7");
+ conf.set("hive.transactional.table.scan", "true");
+ conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
+ conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
+ conf.set("hive.orc.splits.include.file.footer", "false");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(1, splits.length);
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+
+ for (InputSplit split : splits) {
+ assertTrue("OrcSplit is expected", split instanceof OrcSplit);
+ // ETL strategies will have start=3 (start of first stripe)
+ assertTrue(split.toString().contains("start=3"));
+ assertTrue(split.toString().contains("hasFooter=false"));
+ assertTrue(split.toString().contains("hasBase=true"));
+ assertTrue(split.toString().contains("deltas=1"));
+ if (split instanceof OrcSplit) {
+ assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" +
+ " orc splits.", ((OrcSplit) split).hasFooter());
+ }
+ orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
+ }
+
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: open to read footer - split 1 => mock:/mocktable7/0_0
+ // call-2: open to read data - split 1 => mock:/mocktable7/0_0
+ // call-3: open side file (flush length) of delta directory
+ // call-4: fs.exists() check for delta_xxx_xxx/bucket_00000 file
+ assertEquals(4, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
+ public void testACIDReaderFooterSerializeWithDeltas() throws Exception {
+ MockFileSystem fs = new MockFileSystem(conf);
+ MockPath mockPath = new MockPath(fs, "mock:///mocktable8");
+ conf.set("hive.transactional.table.scan", "true");
+ conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
+ conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
+ conf.set("hive.orc.splits.include.file.footer", "true");
+ conf.set("mapred.input.dir", mockPath.toString());
+ conf.set("fs.defaultFS", "mock:///");
+ conf.set("fs.mock.impl", MockFileSystem.class.getName());
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer =
+ OrcFile.createWriter(new Path(mockPath + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+
+ OrcInputFormat orcInputFormat = new OrcInputFormat();
+ InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
+ assertEquals(1, splits.length);
+ int readOpsBefore = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsBefore = statistics.getReadOps();
+ }
+ }
+ assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
+
+ for (InputSplit split : splits) {
+ assertTrue("OrcSplit is expected", split instanceof OrcSplit);
+ // ETL strategies will have start=3 (start of first stripe)
+ assertTrue(split.toString().contains("start=3"));
+ assertTrue(split.toString().contains("hasFooter=true"));
+ assertTrue(split.toString().contains("hasBase=true"));
+ assertTrue(split.toString().contains("deltas=1"));
+ if (split instanceof OrcSplit) {
+ assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" +
+ " orc splits.", ((OrcSplit) split).hasFooter());
+ }
+ orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
+ }
+
+ int readOpsDelta = -1;
+ for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
+ if (statistics.getScheme().equalsIgnoreCase("mock")) {
+ readOpsDelta = statistics.getReadOps() - readOpsBefore;
+ }
+ }
+ // call-1: open to read data - split 1 => mock:/mocktable8/0_0
+ // call-2: open side file (flush length) of delta directory
+ // call-3: fs.exists() check for delta_xxx_xxx/bucket_00000 file
+ assertEquals(3, readOpsDelta);
+
+ // revert back to local fs
+ conf.set("fs.defaultFS", "file:///");
+ }
+
+ @Test
public void testBIStrategySplitBlockBoundary() throws Exception {
conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
OrcInputFormat.Context context = new OrcInputFormat.Context(conf);