diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 96ac584..82364f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -18,11 +18,17 @@ package org.apache.hadoop.hive.ql.io.orc; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.io.NullWritable; @@ -33,15 +39,13 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - /** * A MapReduce/Hive input format for ORC files. */ public class OrcInputFormat extends FileInputFormat - implements InputFormatChecker { + implements InputFormatChecker, VectorizedInputFormatInterface { + + VectorizedOrcInputFormat voif = new VectorizedOrcInputFormat(); private static class OrcRecordReader implements RecordReader { @@ -51,6 +55,7 @@ private final int numColumns; private float progress = 0.0f; + OrcRecordReader(Reader file, Configuration conf, long offset, long length) throws IOException { this.reader = file.rows(offset, length, @@ -161,6 +166,15 @@ private static void includeColumnRecursive(List types, public RecordReader getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { + + boolean vectorPath = conf.getBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.toString(), + false); + if (vectorPath) { + RecordReader vorr = voif.getRecordReader(inputSplit, conf, + reporter); + return (RecordReader) vorr; + } + FileSplit fileSplit = (FileSplit) inputSplit; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); @@ -173,6 +187,13 @@ private static void includeColumnRecursive(List types, public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList files ) throws IOException { + boolean vectorPath = conf.getBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.toString(), + false); + + if (vectorPath) { + return voif.validateInput(fs, conf, files); + } + if (files.size() <= 0) { return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index c28e722..ff96d39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -50,7 +50,7 @@ import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.CommonOrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.lib.Node; @@ -114,7 +114,7 @@ .getName(); protected static final String RCFILE_OUTPUT = RCFileOutputFormat.class .getName(); - protected static final String ORCFILE_INPUT = CommonOrcInputFormat.class + protected static final String ORCFILE_INPUT = OrcInputFormat.class .getName(); protected static final String ORCFILE_OUTPUT = OrcOutputFormat.class .getName();