diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java index 79279ea..f2079a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java @@ -17,6 +17,14 @@ */ package org.apache.hadoop.hive.ql.io.orc; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -37,14 +45,6 @@ import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - /** * A tool for printing out the file structure of ORC files. */ @@ -127,9 +127,10 @@ private static void printMetaData(List files, Configuration conf, OrcProto.StripeFooter footer = rows.readStripeFooter(stripe); long sectionStart = stripeStart; for(OrcProto.Stream section: footer.getStreamsList()) { + String kind = section.hasKind() ? section.getKind().name() : "UNKNOWN"; System.out.println(" Stream: column " + section.getColumn() + - " section " + section.getKind() + " start: " + sectionStart + - " length " + section.getLength()); + " section " + kind + " start: " + sectionStart + + " length " + section.getLength()); sectionStart += section.getLength(); } for (int i = 0; i < footer.getColumnsCount(); ++i) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index e3afa60..bc00768 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -2852,7 +2852,7 @@ static boolean isDictionary(OrcProto.Stream.Kind kind, // figure out which columns have a present stream boolean[] hasNull = new boolean[types.size()]; for(OrcProto.Stream stream: streamList) { - if (stream.getKind() == OrcProto.Stream.Kind.PRESENT) { + if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.PRESENT)) { hasNull[stream.getColumn()] = true; } } @@ -2860,7 +2860,9 @@ static boolean isDictionary(OrcProto.Stream.Kind kind, long length = stream.getLength(); int column = stream.getColumn(); OrcProto.Stream.Kind streamKind = stream.getKind(); - if (StreamName.getArea(streamKind) == StreamName.Area.DATA && + // since stream kind is optional, first check if it exists + if (stream.hasKind() && + (StreamName.getArea(streamKind) == StreamName.Area.DATA) && includedColumns[column]) { // if we aren't filtering or it is a dictionary, load it. if (includedRowGroups == null || @@ -2997,8 +2999,10 @@ static void createStreams(List streamDescriptions, long offset = 0; for(OrcProto.Stream streamDesc: streamDescriptions) { int column = streamDesc.getColumn(); + // do not create stream if stream kind does not exist if ((includeColumn == null || includeColumn[column]) && - StreamName.getArea(streamDesc.getKind()) == StreamName.Area.DATA) { + streamDesc.hasKind() && + (StreamName.getArea(streamDesc.getKind()) == StreamName.Area.DATA)) { long length = streamDesc.getLength(); int first = -1; int last = -2; @@ -3234,7 +3238,7 @@ private int findStripe(long rowNumber) { indexes = new OrcProto.RowIndex[this.indexes.length]; } for(OrcProto.Stream stream: stripeFooter.getStreamsList()) { - if (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX) { + if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) { int col = stream.getColumn(); if ((included == null || included[col]) && indexes[col] == null) { byte[] buffer = new byte[(int) stream.getLength()];