diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java index 845e2e6..58bfbc3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java @@ -17,11 +17,9 @@ */ package org.apache.hadoop.hive.ql.io.orc; -import java.util.ArrayList; -import java.util.List; - import java.io.IOException; import java.text.DecimalFormat; +import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -29,8 +27,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex; import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry; -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; /** * A tool for printing out the file structure of ORC files. @@ -98,9 +94,10 @@ public static void main(String[] args) throws Exception { OrcProto.StripeFooter footer = rows.readStripeFooter(stripe); long sectionStart = stripeStart; for(OrcProto.Stream section: footer.getStreamsList()) { + String kind = section.hasKind() ? section.getKind().name() : "UNKNOWN"; System.out.println(" Stream: column " + section.getColumn() + - " section " + section.getKind() + " start: " + sectionStart + - " length " + section.getLength()); + " section " + kind + " start: " + sectionStart + + " length " + section.getLength()); sectionStart += section.getLength(); } for (int i = 0; i < footer.getColumnsCount(); ++i) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index a6a0ec1..24c2474 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -2835,7 +2835,7 @@ static boolean isDictionary(OrcProto.Stream.Kind kind, // figure out which columns have a present stream boolean[] hasNull = new boolean[types.size()]; for(OrcProto.Stream stream: streamList) { - if (stream.getKind() == OrcProto.Stream.Kind.PRESENT) { + if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.PRESENT)) { hasNull[stream.getColumn()] = true; } } @@ -2843,7 +2843,9 @@ static boolean isDictionary(OrcProto.Stream.Kind kind, long length = stream.getLength(); int column = stream.getColumn(); OrcProto.Stream.Kind streamKind = stream.getKind(); - if (StreamName.getArea(streamKind) == StreamName.Area.DATA && + // since stream kind is optional, first check if it exists + if (stream.hasKind() && + (StreamName.getArea(streamKind) == StreamName.Area.DATA) && includedColumns[column]) { // if we aren't filtering or it is a dictionary, load it. if (includedRowGroups == null || @@ -2980,8 +2982,10 @@ static void createStreams(List streamDescriptions, long offset = 0; for(OrcProto.Stream streamDesc: streamDescriptions) { int column = streamDesc.getColumn(); + // do not create stream if stream kind does not exist if ((includeColumn == null || includeColumn[column]) && - StreamName.getArea(streamDesc.getKind()) == StreamName.Area.DATA) { + streamDesc.hasKind() && + (StreamName.getArea(streamDesc.getKind()) == StreamName.Area.DATA)) { long length = streamDesc.getLength(); int first = -1; int last = -2; @@ -3217,7 +3221,7 @@ private int findStripe(long rowNumber) { indexes = new OrcProto.RowIndex[this.indexes.length]; } for(OrcProto.Stream stream: stripeFooter.getStreamsList()) { - if (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX) { + if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) { int col = stream.getColumn(); if ((included == null || included[col]) && indexes[col] == null) { byte[] buffer = new byte[(int) stream.getLength()];