diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index a182cd7..237b669 100644
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -533,6 +533,7 @@
HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false),
HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000),
HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10),
+ HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false),
HIVESKEWJOIN("hive.optimize.skewjoin", false),
HIVECONVERTJOIN("hive.auto.convert.join", true),
diff --git conf/hive-default.xml.template conf/hive-default.xml.template
index 0d08aa2..f7f50e3 100644
--- conf/hive-default.xml.template
+++ conf/hive-default.xml.template
@@ -1933,6 +1933,14 @@
+ hive.exec.orc.skip.corrupt.data
+ false
+ If ORC reader encounters corrupt data, this value will be used to determine
+ whether to skip the corrupt data or throw exception. The default behavior is to throw exception.
+
+
+
+
hive.multi.insert.move.tasks.share.dependencies
false
diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index f8be581..208e9f4 100644
--- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -25,6 +25,7 @@
import java.util.regex.Pattern;
import org.antlr.runtime.tree.Tree;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.ASTNodeOrigin;
@@ -431,7 +432,9 @@
STATSAGGREGATOR_MISSED_SOMESTATS(30016,
"Stats type {0} is missing from stats aggregator. If you don't want the query " +
"to fail because of this, set hive.stats.atomic=false", true),
- STATS_SKIPPING_BY_ERROR(30017, "Skipping stats aggregation by error {0}", true);
+ STATS_SKIPPING_BY_ERROR(30017, "Skipping stats aggregation by error {0}", true),
+ ORC_CORRUPTED_READ(30018, "Corruption in ORC data encountered. To skip reading corrupted "
+ + "data, set " + HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA + " to true");
;
private int errorCode;
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index 5d5b760..0143b53 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -33,7 +33,7 @@ public static void main(String[] args) throws Exception {
for(String filename: args) {
System.out.println("Structure for " + filename);
Path path = new Path(filename);
- Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
+ Reader reader = OrcFile.createReader(path.getFileSystem(conf), path, conf);
RecordReaderImpl rows = (RecordReaderImpl) reader.rows(null);
System.out.println("Rows: " + reader.getNumberOfRows());
System.out.println("Compression: " + reader.getCompression());
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index 00d38f2..a56fe2f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -114,14 +114,15 @@ private OrcFile() {}
* @return a new ORC file reader.
* @throws IOException
*/
- public static Reader createReader(FileSystem fs, Path path
- ) throws IOException {
- return new ReaderImpl(fs, path);
+ public static Reader createReader(FileSystem fs, Path path,
+ Configuration conf) throws IOException {
+ return new ReaderImpl(fs, path, conf);
}
- public static Reader createReader(FileSystem fs, Path path, FileMetaInfo fileMetaInfo)
+ public static Reader createReader(FileSystem fs, Path path,
+ FileMetaInfo fileMetaInfo, Configuration conf)
throws IOException {
- return new ReaderImpl(fs, path, fileMetaInfo);
+ return new ReaderImpl(fs, path, fileMetaInfo, conf);
}
/**
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 42a3b7e..180be2f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -270,16 +270,16 @@ public static SearchArgument createSarg(List types, Configuration
if(!(fSplit instanceof OrcSplit)){
//If CombineHiveInputFormat is used, it works with FileSplit and not OrcSplit
- reader = OrcFile.createReader(fs, path);
+ reader = OrcFile.createReader(fs, path, conf);
} else {
//We have OrcSplit, which may have footer metadata cached, so use the appropriate reader
//constructor
OrcSplit orcSplit = (OrcSplit) fSplit;
if (orcSplit.hasFooter()) {
FileMetaInfo fMetaInfo = orcSplit.getFileMetaInfo();
- reader = OrcFile.createReader(fs, path, fMetaInfo);
+ reader = OrcFile.createReader(fs, path, fMetaInfo, conf);
} else {
- reader = OrcFile.createReader(fs, path);
+ reader = OrcFile.createReader(fs, path, conf);
}
}
return new OrcRecordReader(reader, conf, fSplit.getStart(), fSplit.getLength());
@@ -299,7 +299,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf,
}
for (FileStatus file : files) {
try {
- OrcFile.createReader(fs, file.getPath());
+ OrcFile.createReader(fs, file.getPath(), conf);
} catch (IOException e) {
return false;
}
@@ -797,14 +797,14 @@ private void populateAndCacheStripeDetails() {
types = fileInfo.types;
// For multiple runs, in case sendSplitsInFooter changes
if (fileMetaInfo == null && context.footerInSplits) {
- orcReader = OrcFile.createReader(fs, file.getPath());
+ orcReader = OrcFile.createReader(fs, file.getPath(), context.conf);
fileInfo.fileMetaInfo = orcReader.getFileMetaInfo();
fileInfo.metadata = orcReader.getMetadata();
fileInfo.types = orcReader.getTypes();
}
}
if (!found) {
- orcReader = OrcFile.createReader(fs, file.getPath());
+ orcReader = OrcFile.createReader(fs, file.getPath(), context.conf);
stripes = orcReader.getStripes();
metadata = orcReader.getMetadata();
types = orcReader.getTypes();
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
index fdd93c6..ec477e2 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
@@ -46,9 +46,10 @@
throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit) inputSplit;
Path path = fileSplit.getPath();
- FileSystem fs = path.getFileSystem(ShimLoader.getHadoopShims()
- .getConfiguration(context));
- return new OrcRecordReader(OrcFile.createReader(fs, path),
+ Configuration conf = ShimLoader.getHadoopShims()
+ .getConfiguration(context);
+ FileSystem fs = path.getFileSystem(conf);
+ return new OrcRecordReader(OrcFile.createReader(fs, path, conf),
ShimLoader.getHadoopShims().getConfiguration(context),
fileSplit.getStart(), fileSplit.getLength());
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 56f25b7..a34a6ce 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -30,6 +30,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -60,6 +61,7 @@
private final OrcProto.Footer footer;
private final ObjectInspector inspector;
private long deserializedSize = -1;
+ private final Configuration conf;
//serialized footer - Keeping this around for use by getFileMetaInfo()
// will help avoid cpu cycles spend in deserializing at cost of increased
@@ -288,11 +290,13 @@ static void checkOrcVersion(Log log, Path path, List version) {
* Constructor that extracts metadata information from file footer
* @param fs
* @param path
+ * @param conf
* @throws IOException
*/
- ReaderImpl(FileSystem fs, Path path) throws IOException {
+ ReaderImpl(FileSystem fs, Path path, Configuration conf) throws IOException {
this.fileSystem = fs;
this.path = path;
+ this.conf = conf;
FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fs, path);
@@ -316,12 +320,14 @@ static void checkOrcVersion(Log log, Path path, List version) {
* @param fs
* @param path
* @param fMetaInfo
+ * @param conf
* @throws IOException
*/
- ReaderImpl(FileSystem fs, Path path, FileMetaInfo fMetaInfo)
+ ReaderImpl(FileSystem fs, Path path, FileMetaInfo fMetaInfo, Configuration conf)
throws IOException {
this.fileSystem = fs;
this.path = path;
+ this.conf = conf;
MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(
fMetaInfo.compressionType,
@@ -487,7 +493,7 @@ public RecordReader rows(long offset, long length, boolean[] include,
return new RecordReaderImpl(this.getStripes(), fileSystem, path, offset,
length, footer.getTypesList(), codec, bufferSize,
- include, footer.getRowIndexStride(), sarg, columnNames);
+ include, footer.getRowIndexStride(), sarg, columnNames, conf);
}
@Override
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index c3c9685..7798a7c 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -31,6 +31,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -84,6 +85,7 @@
private final int[] filterColumns;
// an array about which row groups aren't skipped
private boolean[] includedRowGroups = null;
+ private final Configuration conf;
RecordReaderImpl(Iterable stripes,
FileSystem fileSystem,
@@ -95,13 +97,15 @@
boolean[] included,
long strideRate,
SearchArgument sarg,
- String[] columnNames
+ String[] columnNames,
+ Configuration conf
) throws IOException {
this.file = fileSystem.open(path);
this.codec = codec;
this.types = types;
this.bufferSize = bufferSize;
this.included = included;
+ this.conf = conf;
this.sarg = sarg;
if (sarg != null) {
sargLeaves = sarg.getLeaves();
@@ -128,7 +132,7 @@
firstRow = skippedRows;
totalRowCount = rows;
- reader = createTreeReader(path, 0, types, included);
+ reader = createTreeReader(path, 0, types, included, conf);
indexes = new OrcProto.RowIndex[types.size()];
rowIndexStride = strideRate;
advanceToNextRow(0L);
@@ -163,10 +167,12 @@ public long getNext() {
protected final int columnId;
private BitFieldReader present = null;
protected boolean valuePresent = false;
+ protected final Configuration conf;
- TreeReader(Path path, int columnId) {
+ TreeReader(Path path, int columnId, Configuration conf) {
this.path = path;
this.columnId = columnId;
+ this.conf = conf;
}
void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
@@ -182,7 +188,7 @@ IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
switch (kind) {
case DIRECT_V2:
case DICTIONARY_V2:
- return new RunLengthIntegerReaderV2(in, signed);
+ return new RunLengthIntegerReaderV2(in, signed, conf);
case DIRECT:
case DICTIONARY:
return new RunLengthIntegerReader(in, signed);
@@ -275,8 +281,8 @@ Object nextVector(Object previousVector, long batchSize) throws IOException {
private static class BooleanTreeReader extends TreeReader{
private BitFieldReader reader = null;
- BooleanTreeReader(Path path, int columnId) {
- super(path, columnId);
+ BooleanTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -335,8 +341,8 @@ Object nextVector(Object previousVector, long batchSize) throws IOException {
private static class ByteTreeReader extends TreeReader{
private RunLengthByteReader reader = null;
- ByteTreeReader(Path path, int columnId) {
- super(path, columnId);
+ ByteTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -395,8 +401,8 @@ void skipRows(long items) throws IOException {
private static class ShortTreeReader extends TreeReader{
private IntegerReader reader = null;
- ShortTreeReader(Path path, int columnId) {
- super(path, columnId);
+ ShortTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -465,8 +471,8 @@ void skipRows(long items) throws IOException {
private static class IntTreeReader extends TreeReader{
private IntegerReader reader = null;
- IntTreeReader(Path path, int columnId) {
- super(path, columnId);
+ IntTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -535,8 +541,8 @@ void skipRows(long items) throws IOException {
private static class LongTreeReader extends TreeReader{
private IntegerReader reader = null;
- LongTreeReader(Path path, int columnId) {
- super(path, columnId);
+ LongTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -605,8 +611,8 @@ void skipRows(long items) throws IOException {
private static class FloatTreeReader extends TreeReader{
private InStream stream;
- FloatTreeReader(Path path, int columnId) {
- super(path, columnId);
+ FloatTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -685,8 +691,8 @@ void skipRows(long items) throws IOException {
private static class DoubleTreeReader extends TreeReader{
private InStream stream;
- DoubleTreeReader(Path path, int columnId) {
- super(path, columnId);
+ DoubleTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -764,8 +770,8 @@ void skipRows(long items) throws IOException {
private InStream stream;
private IntegerReader lengths = null;
- BinaryTreeReader(Path path, int columnId) {
- super(path, columnId);
+ BinaryTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -843,8 +849,8 @@ void skipRows(long items) throws IOException {
private IntegerReader nanos = null;
private final LongColumnVector nanoVector = new LongColumnVector();
- TimestampTreeReader(Path path, int columnId) {
- super(path, columnId);
+ TimestampTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -970,8 +976,8 @@ void skipRows(long items) throws IOException {
private static class DateTreeReader extends TreeReader{
private IntegerReader reader = null;
- DateTreeReader(Path path, int columnId) {
- super(path, columnId);
+ DateTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -1045,8 +1051,8 @@ void skipRows(long items) throws IOException {
private final int precision;
private final int scale;
- DecimalTreeReader(Path path, int columnId, int precision, int scale) {
- super(path, columnId);
+ DecimalTreeReader(Path path, int columnId, int precision, int scale, Configuration conf) {
+ super(path, columnId, conf);
this.precision = precision;
this.scale = scale;
}
@@ -1155,8 +1161,8 @@ void skipRows(long items) throws IOException {
private static class StringTreeReader extends TreeReader {
private TreeReader reader;
- StringTreeReader(Path path, int columnId) {
- super(path, columnId);
+ StringTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -1173,11 +1179,11 @@ void startStripe(Map streams,
switch (encodings.get(columnId).getKind()) {
case DIRECT:
case DIRECT_V2:
- reader = new StringDirectTreeReader(path, columnId);
+ reader = new StringDirectTreeReader(path, columnId, conf);
break;
case DICTIONARY:
case DICTIONARY_V2:
- reader = new StringDictionaryTreeReader(path, columnId);
+ reader = new StringDictionaryTreeReader(path, columnId, conf);
break;
default:
throw new IllegalArgumentException("Unsupported encoding " +
@@ -1217,8 +1223,8 @@ void skipRows(long items) throws IOException {
private final LongColumnVector scratchlcv;
- StringDirectTreeReader(Path path, int columnId) {
- super(path, columnId);
+ StringDirectTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
scratchlcv = new LongColumnVector();
}
@@ -1366,8 +1372,8 @@ void skipRows(long items) throws IOException {
private byte[] dictionaryBufferInBytesCache = null;
private final LongColumnVector scratchlcv;
- StringDictionaryTreeReader(Path path, int columnId) {
- super(path, columnId);
+ StringDictionaryTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
scratchlcv = new LongColumnVector();
}
@@ -1532,8 +1538,8 @@ void skipRows(long items) throws IOException {
private static class CharTreeReader extends StringTreeReader {
int maxLength;
- CharTreeReader(Path path, int columnId, int maxLength) {
- super(path, columnId);
+ CharTreeReader(Path path, int columnId, int maxLength, Configuration conf) {
+ super(path, columnId, conf);
this.maxLength = maxLength;
}
@@ -1560,8 +1566,8 @@ Object next(Object previous) throws IOException {
private static class VarcharTreeReader extends StringTreeReader {
int maxLength;
- VarcharTreeReader(Path path, int columnId, int maxLength) {
- super(path, columnId);
+ VarcharTreeReader(Path path, int columnId, int maxLength, Configuration conf) {
+ super(path, columnId, conf);
this.maxLength = maxLength;
}
@@ -1591,8 +1597,8 @@ Object next(Object previous) throws IOException {
StructTreeReader(Path path, int columnId,
List types,
- boolean[] included) throws IOException {
- super(path, columnId);
+ boolean[] included, Configuration conf) throws IOException {
+ super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getFieldNamesCount();
this.fields = new TreeReader[fieldCount];
@@ -1600,7 +1606,7 @@ Object next(Object previous) throws IOException {
for(int i=0; i < fieldCount; ++i) {
int subtype = type.getSubtypes(i);
if (included == null || included[subtype]) {
- this.fields[i] = createTreeReader(path, subtype, types, included);
+ this.fields[i] = createTreeReader(path, subtype, types, included, conf);
}
this.fieldNames[i] = type.getFieldNames(i);
}
@@ -1693,15 +1699,15 @@ void skipRows(long items) throws IOException {
UnionTreeReader(Path path, int columnId,
List types,
- boolean[] included) throws IOException {
- super(path, columnId);
+ boolean[] included, Configuration conf) throws IOException {
+ super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getSubtypesCount();
this.fields = new TreeReader[fieldCount];
for(int i=0; i < fieldCount; ++i) {
int subtype = type.getSubtypes(i);
if (included == null || included[subtype]) {
- this.fields[i] = createTreeReader(path, subtype, types, included);
+ this.fields[i] = createTreeReader(path, subtype, types, included, conf);
}
}
}
@@ -1772,11 +1778,11 @@ void skipRows(long items) throws IOException {
ListTreeReader(Path path, int columnId,
List types,
- boolean[] included) throws IOException {
- super(path, columnId);
+ boolean[] included, Configuration conf) throws IOException {
+ super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
elementReader = createTreeReader(path, type.getSubtypes(0), types,
- included);
+ included, conf);
}
@Override
@@ -1863,18 +1869,18 @@ void skipRows(long items) throws IOException {
MapTreeReader(Path path,
int columnId,
List types,
- boolean[] included) throws IOException {
- super(path, columnId);
+ boolean[] included, Configuration conf) throws IOException {
+ super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int keyColumn = type.getSubtypes(0);
int valueColumn = type.getSubtypes(1);
if (included == null || included[keyColumn]) {
- keyReader = createTreeReader(path, keyColumn, types, included);
+ keyReader = createTreeReader(path, keyColumn, types, included, conf);
} else {
keyReader = null;
}
if (included == null || included[valueColumn]) {
- valueReader = createTreeReader(path, valueColumn, types, included);
+ valueReader = createTreeReader(path, valueColumn, types, included, conf);
} else {
valueReader = null;
}
@@ -1956,54 +1962,55 @@ void skipRows(long items) throws IOException {
private static TreeReader createTreeReader(Path path,
int columnId,
List types,
- boolean[] included
+ boolean[] included,
+ Configuration conf
) throws IOException {
OrcProto.Type type = types.get(columnId);
switch (type.getKind()) {
case BOOLEAN:
- return new BooleanTreeReader(path, columnId);
+ return new BooleanTreeReader(path, columnId, conf);
case BYTE:
- return new ByteTreeReader(path, columnId);
+ return new ByteTreeReader(path, columnId, conf);
case DOUBLE:
- return new DoubleTreeReader(path, columnId);
+ return new DoubleTreeReader(path, columnId, conf);
case FLOAT:
- return new FloatTreeReader(path, columnId);
+ return new FloatTreeReader(path, columnId, conf);
case SHORT:
- return new ShortTreeReader(path, columnId);
+ return new ShortTreeReader(path, columnId, conf);
case INT:
- return new IntTreeReader(path, columnId);
+ return new IntTreeReader(path, columnId, conf);
case LONG:
- return new LongTreeReader(path, columnId);
+ return new LongTreeReader(path, columnId, conf);
case STRING:
- return new StringTreeReader(path, columnId);
+ return new StringTreeReader(path, columnId, conf);
case CHAR:
if (!type.hasMaximumLength()) {
throw new IllegalArgumentException("ORC char type has no length specified");
}
- return new CharTreeReader(path, columnId, type.getMaximumLength());
+ return new CharTreeReader(path, columnId, type.getMaximumLength(), conf);
case VARCHAR:
if (!type.hasMaximumLength()) {
throw new IllegalArgumentException("ORC varchar type has no length specified");
}
- return new VarcharTreeReader(path, columnId, type.getMaximumLength());
+ return new VarcharTreeReader(path, columnId, type.getMaximumLength(), conf);
case BINARY:
- return new BinaryTreeReader(path, columnId);
+ return new BinaryTreeReader(path, columnId, conf);
case TIMESTAMP:
- return new TimestampTreeReader(path, columnId);
+ return new TimestampTreeReader(path, columnId, conf);
case DATE:
- return new DateTreeReader(path, columnId);
+ return new DateTreeReader(path, columnId, conf);
case DECIMAL:
int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION;
int scale = type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE;
- return new DecimalTreeReader(path, columnId, precision, scale);
+ return new DecimalTreeReader(path, columnId, precision, scale, conf);
case STRUCT:
- return new StructTreeReader(path, columnId, types, included);
+ return new StructTreeReader(path, columnId, types, included, conf);
case LIST:
- return new ListTreeReader(path, columnId, types, included);
+ return new ListTreeReader(path, columnId, types, included, conf);
case MAP:
- return new MapTreeReader(path, columnId, types, included);
+ return new MapTreeReader(path, columnId, types, included, conf);
case UNION:
- return new UnionTreeReader(path, columnId, types, included);
+ return new UnionTreeReader(path, columnId, types, included, conf);
default:
throw new IllegalArgumentException("Unsupported type " +
type.getKind());
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java
index 5305e00..a7e66a8 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java
@@ -20,6 +20,10 @@
import java.io.EOFException;
import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerWriterV2.EncodingType;
@@ -34,10 +38,13 @@
private final long[] literals = new long[RunLengthIntegerWriterV2.MAX_SCOPE];
private int numLiterals = 0;
private int used = 0;
+ private final boolean skipCorrupt;
- RunLengthIntegerReaderV2(InStream input, boolean signed) throws IOException {
+ RunLengthIntegerReaderV2(InStream input, boolean signed,
+ Configuration conf) throws IOException {
this.input = input;
this.signed = signed;
+ this.skipCorrupt = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA);
}
private void readValues() throws IOException {
@@ -163,14 +170,20 @@ private void readPatchedBaseValues(int firstByte) throws IOException {
// unpack the patch blob
long[] unpackedPatch = new long[pl];
- SerializationUtils.readInts(unpackedPatch, 0, pl, pw + pgw, input);
+
+ if ((pw + pgw) > 64 && !skipCorrupt) {
+ throw new IOException(ErrorMsg.ORC_CORRUPTED_READ.getMsg());
+ }
+ int bitSize = SerializationUtils.getClosestFixedBits(pw + pgw);
+ SerializationUtils.readInts(unpackedPatch, 0, pl, bitSize, input);
// apply the patch directly when decoding the packed data
int patchIdx = 0;
long currGap = 0;
long currPatch = 0;
+ long patchMask = ((1L << pw) - 1);
currGap = unpackedPatch[patchIdx] >>> pw;
- currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+ currPatch = unpackedPatch[patchIdx] & patchMask;
long actualGap = 0;
// special case: gap is >255 then patch value will be 0.
@@ -179,7 +192,7 @@ private void readPatchedBaseValues(int firstByte) throws IOException {
actualGap += 255;
patchIdx++;
currGap = unpackedPatch[patchIdx] >>> pw;
- currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+ currPatch = unpackedPatch[patchIdx] & patchMask;
}
// add the left over gap
actualGap += currGap;
@@ -199,7 +212,7 @@ private void readPatchedBaseValues(int firstByte) throws IOException {
if (patchIdx < pl) {
// read the next gap and patch
currGap = unpackedPatch[patchIdx] >>> pw;
- currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+ currPatch = unpackedPatch[patchIdx] & patchMask;
actualGap = 0;
// special case: gap is >255 then patch will be 0. if gap is
@@ -208,7 +221,7 @@ private void readPatchedBaseValues(int firstByte) throws IOException {
actualGap += 255;
patchIdx++;
currGap = unpackedPatch[patchIdx] >>> pw;
- currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+ currPatch = unpackedPatch[patchIdx] & patchMask;
}
// add the left over gap
actualGap += currGap;
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
index 3b684d7..171ff89 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
@@ -567,6 +567,7 @@ private void preparePatchedBlob() {
// since we are considering only 95 percentile, the size of gap and
// patch array can contain only be 5% values
patchLength = (int) Math.ceil((baseRedLiterals.length * 0.05));
+
int[] gapList = new int[patchLength];
long[] patchList = new long[patchLength];
@@ -574,6 +575,15 @@ private void preparePatchedBlob() {
patchWidth = brBits100p - brBits95p;
patchWidth = SerializationUtils.getClosestFixedBits(patchWidth);
+ // if patch bit requirement is 64 then it will not possible to pack
+ // gap and patch together in a long. To make sure gap and patch can be
+ // packed together adjust the patch width
+ if (patchWidth == 64) {
+ patchWidth = 56;
+ brBits95p = 8;
+ mask = (1L << brBits95p) - 1;
+ }
+
int gapIdx = 0;
int patchIdx = 0;
int prev = 0;
@@ -642,7 +652,7 @@ private void preparePatchedBlob() {
long g = gapList[gapIdx++];
long p = patchList[patchIdx++];
while (g > 255) {
- gapVsPatchList[i++] = (255 << patchWidth) | 0;
+ gapVsPatchList[i++] = (255L << patchWidth);
g -= 255;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index d904c44..27a9338 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -151,16 +151,16 @@ public VectorizedOrcInputFormat() {
if(!(fSplit instanceof OrcSplit)){
//If CombineHiveInputFormat is used, it works with FileSplit and not OrcSplit
- reader = OrcFile.createReader(fs, path);
+ reader = OrcFile.createReader(fs, path, conf);
} else {
//We have OrcSplit, which may have footer metadata cached, so use the appropriate reader
//constructor
OrcSplit orcSplit = (OrcSplit) fSplit;
if (orcSplit.hasFooter()) {
FileMetaInfo fMetaInfo = orcSplit.getFileMetaInfo();
- reader = OrcFile.createReader(fs, path, fMetaInfo);
+ reader = OrcFile.createReader(fs, path, fMetaInfo, conf);
} else {
- reader = OrcFile.createReader(fs, path);
+ reader = OrcFile.createReader(fs, path, conf);
}
}
@@ -176,7 +176,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf,
}
for (FileStatus file : files) {
try {
- OrcFile.createReader(fs, file.getPath());
+ OrcFile.createReader(fs, file.getPath(), conf);
} catch (IOException e) {
return false;
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java
index d8106fb..591ec3f 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java
@@ -22,6 +22,7 @@
import java.nio.ByteBuffer;
import java.util.Random;
+import org.apache.hadoop.conf.Configuration;
import org.junit.Test;
public class TestIntegerCompressionReader {
@@ -57,7 +58,8 @@ public void runSeekTest(CompressionCodec codec) throws Exception {
new RunLengthIntegerReaderV2(InStream.create
("test", new ByteBuffer[]{inBuf},
new long[]{0}, inBuf.remaining(),
- codec, 1000), true);
+ codec, 1000), true,
+ new Configuration());
for(int i=0; i < 2048; ++i) {
int x = (int) in.next();
if (i < 1024) {
@@ -112,7 +114,8 @@ public void testSkips() throws Exception {
new ByteBuffer[]{inBuf},
new long[]{0},
inBuf.remaining(),
- null, 100), true);
+ null, 100), true,
+ new Configuration());
for(int i=0; i < 2048; i += 10) {
int x = (int) in.next();
if (i < 1024) {
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java
index 1a87c0a..b9899f7 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java
@@ -191,7 +191,7 @@ public void testNewOutputFormat() throws Exception {
Path outputFilePath = new Path(outputPath, "part-m-00000");
assertTrue(localFs.exists(outputFilePath));
- Reader reader = OrcFile.createReader(localFs, outputFilePath);
+ Reader reader = OrcFile.createReader(localFs, outputFilePath, conf);
assertTrue(reader.getNumberOfRows() == rownum);
assertEquals(reader.getCompression(), CompressionKind.ZLIB);
StructObjectInspector soi =
@@ -248,7 +248,7 @@ public void testNewOutputFormatWithCompression() throws Exception {
assertTrue(result);
Path outputFilePath = new Path(outputPath, "part-m-00000");
- Reader reader = OrcFile.createReader(localFs, outputFilePath);
+ Reader reader = OrcFile.createReader(localFs, outputFilePath, conf);
assertEquals(reader.getCompression(), CompressionKind.SNAPPY);
localFs.delete(outputPath, true);
@@ -351,7 +351,7 @@ public void testNewOutputFormatComplex() throws Exception {
assertTrue(result);
Path outputFilePath = new Path(outputPath, "part-r-00000");
- Reader reader = OrcFile.createReader(localFs, outputFilePath);
+ Reader reader = OrcFile.createReader(localFs, outputFilePath, conf);
RecordReader rows = reader.rows(null);
ObjectInspector orcOi = reader.getObjectInspector();
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
index dbb7641..8f68acc 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
@@ -128,7 +128,7 @@ public void testBasicRow() throws Exception {
writer.addRow(new Row(111, 1111L));
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
while (rows.hasNext()) {
Object row = rows.next(null);
@@ -162,7 +162,7 @@ public void testBasicOld() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -197,7 +197,7 @@ public void testBasicNew() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -228,7 +228,7 @@ public void testBasicDelta1() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -259,7 +259,7 @@ public void testBasicDelta2() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -290,7 +290,7 @@ public void testBasicDelta3() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -321,7 +321,7 @@ public void testBasicDelta4() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -351,7 +351,7 @@ public void testIntegerMin() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -382,7 +382,7 @@ public void testIntegerMax() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -413,7 +413,7 @@ public void testLongMin() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -444,7 +444,7 @@ public void testLongMax() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -478,7 +478,7 @@ public void testRandomInt() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -512,7 +512,7 @@ public void testRandomLong() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -554,7 +554,7 @@ public void testPatchedBaseNegativeMin() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -596,7 +596,7 @@ public void testPatchedBaseNegativeMin2() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -638,7 +638,7 @@ public void testPatchedBaseNegativeMin3() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -671,7 +671,7 @@ public void testPatchedBaseNegativeMin4() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -706,7 +706,7 @@ public void testPatchedBaseAt0() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -741,7 +741,7 @@ public void testPatchedBaseAt1() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -775,7 +775,7 @@ public void testPatchedBaseAt255() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -809,7 +809,7 @@ public void testPatchedBaseAt256() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -843,7 +843,7 @@ public void testPatchedBase510() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -877,7 +877,166 @@ public void testPatchedBase511() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
+ RecordReader rows = reader.rows(null);
+ int idx = 0;
+ while (rows.hasNext()) {
+ Object row = rows.next(null);
+ assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
+ }
+ }
+
+ @Test
+ public void testPatchedBaseMax1() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+
+ List input = Lists.newArrayList();
+ Random rand = new Random();
+ for (int i = 0; i < 5120; i++) {
+ input.add((long) rand.nextInt(60));
+ }
+ input.set(511, Long.MAX_VALUE);
+
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+ for (Long l : input) {
+ writer.addRow(l);
+ }
+ writer.close();
+
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
+ RecordReader rows = reader.rows(null);
+ int idx = 0;
+ while (rows.hasNext()) {
+ Object row = rows.next(null);
+ assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
+ }
+ }
+
+ @Test
+ public void testPatchedBaseMax2() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+
+ List input = Lists.newArrayList();
+ Random rand = new Random();
+ for (int i = 0; i < 5120; i++) {
+ input.add((long) rand.nextInt(60));
+ }
+ input.set(128, Long.MAX_VALUE);
+ input.set(256, Long.MAX_VALUE);
+ input.set(511, Long.MAX_VALUE);
+
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+ for (Long l : input) {
+ writer.addRow(l);
+ }
+ writer.close();
+
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
+ RecordReader rows = reader.rows(null);
+ int idx = 0;
+ while (rows.hasNext()) {
+ Object row = rows.next(null);
+ assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
+ }
+ }
+
+ @Test
+ public void testPatchedBaseMax3() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+
+ List input = Lists.newArrayList();
+ input.add(371946367L);
+ input.add(11963367L);
+ input.add(68639400007L);
+ input.add(100233367L);
+ input.add(6367L);
+ input.add(10026367L);
+ input.add(3670000L);
+ input.add(3602367L);
+ input.add(4719226367L);
+ input.add(7196367L);
+ input.add(444442L);
+ input.add(210267L);
+ input.add(21033L);
+ input.add(160267L);
+ input.add(400267L);
+ input.add(23634347L);
+ input.add(16027L);
+ input.add(46026367L);
+ input.add(Long.MAX_VALUE);
+ input.add(33333L);
+
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+ for (Long l : input) {
+ writer.addRow(l);
+ }
+ writer.close();
+
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
+ RecordReader rows = reader.rows(null);
+ int idx = 0;
+ while (rows.hasNext()) {
+ Object row = rows.next(null);
+ assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
+ }
+ }
+
+ @Test
+ public void testPatchedBaseMax4() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+
+ List input = Lists.newArrayList();
+ for (int i = 0; i < 25; i++) {
+ input.add(371292224226367L);
+ input.add(119622332222267L);
+ input.add(686329400222007L);
+ input.add(100233333222367L);
+ input.add(636272333322222L);
+ input.add(10202633223267L);
+ input.add(36700222022230L);
+ input.add(36023226224227L);
+ input.add(47192226364427L);
+ input.add(71963622222447L);
+ input.add(22244444222222L);
+ input.add(21220263327442L);
+ input.add(21032233332232L);
+ input.add(16026322232227L);
+ input.add(40022262272212L);
+ input.add(23634342227222L);
+ input.add(16022222222227L);
+ input.add(46026362222227L);
+ input.add(46026362222227L);
+ input.add(33322222222323L);
+ }
+ input.add(Long.MAX_VALUE);
+
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+ for (Long l : input) {
+ writer.addRow(l);
+ }
+ writer.close();
+
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -938,7 +1097,7 @@ public void testPatchedBaseTimestamp() throws Exception {
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 0;
while (rows.hasNext()) {
@@ -966,7 +1125,7 @@ public void testDirectLargeNegatives() throws Exception {
writer.addRow(-5535739865598783616L);
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
Object row = rows.next(null);
assertEquals(-7486502418706614742L, ((LongWritable) row).get());
@@ -1005,7 +1164,7 @@ public void testSeek() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
int idx = 55555;
rows.seekToRow(idx);
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 03fc705..4d3013d 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -207,7 +207,7 @@ public void openFileSystem () throws Exception {
@Test
public void testReadFormat_0_11() throws Exception {
Path oldFilePath = new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"));
- Reader reader = OrcFile.createReader(fs, oldFilePath);
+ Reader reader = OrcFile.createReader(fs, oldFilePath, conf);
int stripeCount = 0;
int rowCount = 0;
@@ -470,7 +470,7 @@ public void testStringAndBinaryStatistics() throws Exception {
writer.addRow(new SimpleStruct(bytes(0,1,2,3,4,5), null));
writer.addRow(new SimpleStruct(null, "hi"));
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
@@ -565,7 +565,7 @@ public void testStripeLevelStats() throws Exception {
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
Metadata metadata = reader.getMetadata();
int numStripes = metadata.getStripeStatistics().size();
assertEquals(3, numStripes);
@@ -643,7 +643,7 @@ public void test1() throws Exception {
list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
map(inner(5, "chani"), inner(1, "mauddib"))));
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
Metadata metadata = reader.getMetadata();
@@ -893,7 +893,7 @@ public void columnProjection() throws Exception {
writer.addRow(inner(x, y));
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
// check out the statistics
ColumnStatistics[] stats = reader.getStatistics();
@@ -957,7 +957,7 @@ public void emptyFile() throws Exception {
.compress(CompressionKind.NONE)
.bufferSize(100));
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(false, reader.rows(null).hasNext());
assertEquals(CompressionKind.NONE, reader.getCompression());
assertEquals(0, reader.getNumberOfRows());
@@ -994,7 +994,7 @@ public void metaData() throws Exception {
null, null, null, null));
writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(byteBuf(5,7,11,13,17,19), reader.getMetadataValue("clobber"));
assertEquals(byteBuf(1,2,3,4,5,6,7,-1,-2,127,-128),
reader.getMetadataValue("my.meta"));
@@ -1112,7 +1112,7 @@ public void testUnionAndTimestamp() throws Exception {
union.set((byte) 0, new IntWritable(138));
writer.addRow(row);
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(5309, reader.getNumberOfRows());
DecimalColumnStatistics stats =
@@ -1243,7 +1243,7 @@ public void testSnappy() throws Exception {
Integer.toHexString(rand.nextInt())));
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);
rand = new Random(12);
OrcStruct row = null;
@@ -1286,7 +1286,7 @@ public void testWithoutIndex() throws Exception {
}
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(50000, reader.getNumberOfRows());
assertEquals(0, reader.getRowIndexStride());
StripeInformation stripe = reader.getStripes().iterator().next();
@@ -1350,7 +1350,7 @@ public void testSeek() throws Exception {
}
writer.close();
writer = null;
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(COUNT, reader.getNumberOfRows());
RecordReader rows = reader.rows(null);
OrcStruct row = null;
@@ -1517,7 +1517,7 @@ public void testMemoryManagement() throws Exception {
}
writer.close();
assertEquals(null, memory.path);
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
int i = 0;
for(StripeInformation stripe: reader.getStripes()) {
i += 1;
@@ -1542,7 +1542,7 @@ public void testPredicatePushdown() throws Exception {
writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));
}
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(3500, reader.getNumberOfRows());
SearchArgument sarg = SearchArgument.FACTORY.newBuilder()
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
index 492bb00..9952bff 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
@@ -108,7 +108,7 @@ public void testMultiStripeWithNull() throws Exception {
Lists.newArrayList(new InnerStruct(100))));
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(20000, reader.getNumberOfRows());
@@ -212,7 +212,7 @@ public void testMultiStripeWithoutNull() throws Exception {
Lists.newArrayList(new InnerStruct(100))));
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(20000, reader.getNumberOfRows());
@@ -313,7 +313,7 @@ public void testColumnsWithNullAndCompression() throws Exception {
Lists.newArrayList(new InnerStruct(100))));
writer.close();
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(8, reader.getNumberOfRows());
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
index 6aba386..0732534 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
@@ -212,7 +212,7 @@ public void testStringAndBinaryStatistics() throws Exception {
writer.close();
assertEquals(4, writer.getNumberOfRows());
assertEquals(273, writer.getRawDataSize());
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(4, reader.getNumberOfRows());
assertEquals(273, reader.getRawDataSize());
assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
@@ -310,7 +310,7 @@ public void testOrcSerDeStatsList() throws Exception {
assertEquals(5000, writer.getNumberOfRows());
assertEquals(430000000, writer.getRawDataSize());
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
// stats from reader
assertEquals(5000, reader.getNumberOfRows());
assertEquals(430000000, reader.getRawDataSize());
@@ -341,7 +341,7 @@ public void testOrcSerDeStatsMap() throws Exception {
assertEquals(1000, writer.getNumberOfRows());
assertEquals(950000, writer.getRawDataSize());
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
// stats from reader
assertEquals(1000, reader.getNumberOfRows());
assertEquals(950000, reader.getRawDataSize());
@@ -372,7 +372,7 @@ public void testOrcSerDeStatsSimpleWithNulls() throws Exception {
assertEquals(1000, writer.getNumberOfRows());
assertEquals(44500, writer.getRawDataSize());
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
// stats from reader
assertEquals(1000, reader.getNumberOfRows());
assertEquals(44500, reader.getRawDataSize());
@@ -413,7 +413,7 @@ public void testOrcSerDeStatsComplex() throws Exception {
long rawDataSize = writer.getRawDataSize();
assertEquals(2, rowCount);
assertEquals(1740, rawDataSize);
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(2, reader.getNumberOfRows());
assertEquals(1740, reader.getRawDataSize());
@@ -506,7 +506,7 @@ public void testOrcSerDeStatsComplexOldFormat() throws Exception {
long rawDataSize = writer.getRawDataSize();
assertEquals(2, rowCount);
assertEquals(1740, rawDataSize);
- Reader reader = OrcFile.createReader(fs, testFilePath);
+ Reader reader = OrcFile.createReader(fs, testFilePath, conf);
assertEquals(2, reader.getNumberOfRows());
assertEquals(1740, reader.getRawDataSize());
@@ -573,7 +573,7 @@ public void testOrcSerDeStatsComplexOldFormat() throws Exception {
@Test(expected = ClassCastException.class)
public void testSerdeStatsOldFormat() throws Exception {
Path oldFilePath = new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"));
- Reader reader = OrcFile.createReader(fs, oldFilePath);
+ Reader reader = OrcFile.createReader(fs, oldFilePath, conf);
int stripeCount = 0;
int rowCount = 0;
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
index 23d89df..10534c0 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
@@ -131,8 +131,8 @@ public void createFile() throws Exception {
private void checkVectorizedReader() throws Exception {
- Reader vreader = OrcFile.createReader(testFilePath.getFileSystem(conf), testFilePath);
- Reader reader = OrcFile.createReader(testFilePath.getFileSystem(conf), testFilePath);
+ Reader vreader = OrcFile.createReader(testFilePath.getFileSystem(conf), testFilePath, conf);
+ Reader reader = OrcFile.createReader(testFilePath.getFileSystem(conf), testFilePath, conf);
RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows(null);
RecordReaderImpl rr = (RecordReaderImpl) reader.rows(null);
VectorizedRowBatch batch = null;