diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java deleted file mode 100644 index 425c12d..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/CommonOrcInputFormat.java +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.util.ArrayList; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.io.InputFormatChecker; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; - - -public class CommonOrcInputFormat extends FileInputFormat - implements InputFormatChecker, VectorizedInputFormatInterface { - - OrcInputFormat oif = new OrcInputFormat(); - VectorizedOrcInputFormat voif = new VectorizedOrcInputFormat(); - - private static class CommonOrcRecordReader - implements RecordReader { - - final RecordReader vorr; - final RecordReader orr; - - public CommonOrcRecordReader(RecordReader vorr, - RecordReader orr) { - this.vorr = vorr; - this.orr = orr; - } - - @Override - public void close() throws IOException { - if (vorr != null) { - vorr.close(); - } else { - orr.close(); - } - - } - - @Override - public NullWritable createKey() { - if (vorr != null) { - return vorr.createKey(); - } else { - return orr.createKey(); - } - } - - @Override - public Writable createValue() { - if (vorr != null) { - return vorr.createValue(); - } else { - return orr.createValue(); - } - } - - @Override - public long getPos() throws IOException { - if (vorr != null) { - return vorr.getPos(); - } else { - return orr.getPos(); - } - } - - @Override - public float getProgress() throws IOException { - if (vorr != null) { - return vorr.getProgress(); - } else { - return orr.getProgress(); - } - } - - @Override - public boolean next(NullWritable arg0, Writable arg1) throws IOException { - if (vorr != null) { - return vorr.next(arg0, (VectorizedRowBatch) arg1); - } else { - return orr.next(arg0, (OrcStruct) arg1); - } - } - - } - - @Override - public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList files) - throws IOException { - boolean vectorPath = conf.getBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.toString(), - false); - if (vectorPath) { - return voif.validateInput(fs, conf, files); - } else { - return oif.validateInput(fs, conf, files); - } - } - - @Override - public RecordReader getRecordReader(InputSplit split, JobConf conf, - Reporter reporter) throws IOException { - boolean vectorPath = conf.getBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.toString(), - false); - if (vectorPath) { - RecordReader vorr = voif.getRecordReader(split, conf, - reporter); - return new CommonOrcRecordReader(vorr, null); - } else { - RecordReader orr = oif.getRecordReader(split, conf, reporter); - return new CommonOrcRecordReader(null, orr); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 96ac584..82364f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -18,11 +18,17 @@ package org.apache.hadoop.hive.ql.io.orc; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.io.NullWritable; @@ -33,15 +39,13 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - /** * A MapReduce/Hive input format for ORC files. */ public class OrcInputFormat extends FileInputFormat - implements InputFormatChecker { + implements InputFormatChecker, VectorizedInputFormatInterface { + + VectorizedOrcInputFormat voif = new VectorizedOrcInputFormat(); private static class OrcRecordReader implements RecordReader { @@ -51,6 +55,7 @@ private final int numColumns; private float progress = 0.0f; + OrcRecordReader(Reader file, Configuration conf, long offset, long length) throws IOException { this.reader = file.rows(offset, length, @@ -161,6 +166,15 @@ private static void includeColumnRecursive(List types, public RecordReader getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { + + boolean vectorPath = conf.getBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.toString(), + false); + if (vectorPath) { + RecordReader vorr = voif.getRecordReader(inputSplit, conf, + reporter); + return (RecordReader) vorr; + } + FileSplit fileSplit = (FileSplit) inputSplit; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); @@ -173,6 +187,13 @@ private static void includeColumnRecursive(List types, public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList files ) throws IOException { + boolean vectorPath = conf.getBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.toString(), + false); + + if (vectorPath) { + return voif.validateInput(fs, conf, files); + } + if (files.size() <= 0) { return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index c28e722..ff96d39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -50,7 +50,7 @@ import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.CommonOrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.lib.Node; @@ -114,7 +114,7 @@ .getName(); protected static final String RCFILE_OUTPUT = RCFileOutputFormat.class .getName(); - protected static final String ORCFILE_INPUT = CommonOrcInputFormat.class + protected static final String ORCFILE_INPUT = OrcInputFormat.class .getName(); protected static final String ORCFILE_OUTPUT = OrcOutputFormat.class .getName();