Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1540411) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -510,6 +510,14 @@ // Define the default ORC stripe size HIVE_ORC_DEFAULT_STRIPE_SIZE("hive.exec.orc.default.stripe.size", 256L * 1024 * 1024), + // Define the default ORC index stripe + HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride", null), + // Define the default ORC buffer size + HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", null), + // Define the default block padding + HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", null), + // Define the default orc compress + HIVE_ORC_DEFAULT_COMPRESS("hive.exec.orc.default.compress", null), HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold", 0.8f), Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (revision 1540411) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (working copy) @@ -147,6 +147,18 @@ stripeSizeValue = conf.getLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname, DEFAULT_STRIPE_SIZE); + rowIndexStrideValue = + conf.getInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE.varname, + DEFAULT_ROW_INDEX_STRIDE); + bufferSizeValue = + conf.getInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE.varname, + DEFAULT_ROW_INDEX_STRIDE); + blockPaddingValue = + conf.getBoolean(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING.varname, + DEFAULT_BLOCK_PADDING); + compressValue = + CompressionKind.valueOf(conf.get(HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, + DEFAULT_COMPRESSION_KIND.toString())); String versionName = conf.get(HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.varname); if (versionName == null) { Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (revision 1540411) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (working copy) @@ -93,10 +93,7 @@ long offset, long length) throws IOException { List types = file.getTypes(); numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); - boolean[] includedColumns = findIncludedColumns(types, conf); - String[] columnNames = getIncludedColumnNames(types, includedColumns, conf); - SearchArgument sarg = createSarg(types, conf); - this.reader = file.rows(offset, length, includedColumns, sarg, columnNames); + this.reader = createReaderFromFile(file, conf, offset, length); this.offset = offset; this.length = length; } @@ -137,6 +134,15 @@ return progress; } } + + static org.apache.hadoop.hive.ql.io.orc.RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length) throws IOException { + List types = file.getTypes(); + boolean[] includedColumns = findIncludedColumns(types, conf); + String[] columnNames = getIncludedColumnNames(types, includedColumns, conf); + SearchArgument sarg = createSarg(types, conf); + org.apache.hadoop.hive.ql.io.orc.RecordReader reader = file.rows(offset, length, includedColumns, sarg, columnNames); + return reader; + } private static final PathFilter hiddenFileFilter = new PathFilter(){ public boolean accept(Path p){ @@ -278,7 +284,7 @@ * @param conf The configuration of the job * @return the list of input {@link Path}s for the map-reduce job. */ - static Path[] getInputPaths(JobConf conf) throws IOException { + static Path[] getInputPaths(Configuration conf) throws IOException { String dirs = conf.get("mapred.input.dir"); if (dirs == null) { throw new IOException("Configuration mapred.input.dir is not defined."); @@ -296,8 +302,32 @@ * the different worker threads. */ static class Context { + static class FileSplitInfo { + FileSplitInfo(Path file, long start, long length, String[] hosts) { + this.file = file; + this.start = start; + this.length = length; + this.hosts = hosts; + } + Path getPath() { + return file; + } + long getStart() { + return start; + } + long getLength() { + return length; + } + String[] getLocations() { + return hosts; + } + private Path file; + private long start; + private long length; + private String[] hosts; + } private final ExecutorService threadPool = Executors.newFixedThreadPool(10); - private final List splits = new ArrayList(10000); + private final List splits = new ArrayList(10000); private final List errors = new ArrayList(); private final HadoopShims shims = ShimLoader.getHadoopShims(); private final Configuration conf; @@ -326,7 +356,7 @@ * the back. * @result the Nth file split */ - FileSplit getResult(int index) { + FileSplitInfo getResult(int index) { if (index >= 0) { return splits.get(index); } else { @@ -518,8 +548,8 @@ hostList.toArray(hosts); } synchronized (context.splits) { - context.splits.add(new FileSplit(file.getPath(), offset, length, - hosts)); + context.splits.add(new Context.FileSplitInfo(file.getPath(), offset, length, + hosts)); } } @@ -564,31 +594,39 @@ } } - @Override - public InputSplit[] getSplits(JobConf job, - int numSplits) throws IOException { + static List generateSplitsInfo(Configuration conf) throws IOException { // use threads to resolve directories into splits - Context context = new Context(job); - for(Path dir: getInputPaths(job)) { - FileSystem fs = dir.getFileSystem(job); + Context context = new Context(conf); + for(Path dir: getInputPaths(conf)) { + FileSystem fs = dir.getFileSystem(conf); context.schedule(new FileGenerator(context, fs, dir)); } context.waitForTasks(); // deal with exceptions - if (!context.errors.isEmpty()) { + if (!context.getErrors().isEmpty()) { List errors = - new ArrayList(context.errors.size()); - for(Throwable th: context.errors) { + new ArrayList(context.getErrors().size()); + for(Throwable th: context.getErrors()) { if (th instanceof IOException) { errors.add((IOException) th); - } else { - throw new IOException("serious problem", th); + } else { + throw new IOException("serious problem", th); + } } + throw new InvalidInputException(errors); } - throw new InvalidInputException(errors); + return context.splits; } - InputSplit[] result = new InputSplit[context.splits.size()]; - context.splits.toArray(result); + + @Override + public InputSplit[] getSplits(JobConf job, + int numSplits) throws IOException { + List splits = OrcInputFormat.generateSplitsInfo(job); + InputSplit[] result = new InputSplit[splits.size()]; + for (int i=0;i{ + + @Override + public RecordReader createRecordReader(InputSplit inputSplit, + TaskAttemptContext context) throws IOException, InterruptedException { + FileSplit fileSplit = (FileSplit) inputSplit; + Path path = fileSplit.getPath(); + FileSystem fs = path.getFileSystem(context.getConfiguration()); + return new OrcRecordReader(OrcFile.createReader(fs, path), context.getConfiguration(), + fileSplit.getStart(), fileSplit.getLength()); + } + + private static class OrcRecordReader + extends RecordReader { + private final org.apache.hadoop.hive.ql.io.orc.RecordReader reader; + private final int numColumns; + OrcStruct value; + private float progress = 0.0f; + + OrcRecordReader(Reader file, Configuration conf, + long offset, long length) throws IOException { + List types = file.getTypes(); + numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); + value = new OrcStruct(numColumns); + this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length); + } + + @Override + public void close() throws IOException { + reader.close(); + } + + + @Override + public NullWritable getCurrentKey() throws IOException, InterruptedException { + return NullWritable.get(); + } + + + @Override + public OrcStruct getCurrentValue() throws IOException, InterruptedException { + return value; + } + + + @Override + public float getProgress() throws IOException, InterruptedException { + return progress; + } + + + @Override + public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, + InterruptedException { + } + + + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + if (reader.hasNext()) { + reader.next(value); + progress = reader.getProgress(); + return true; + } else { + return false; + } + } + } + + @Override + public List getSplits(JobContext jobContext) throws IOException, InterruptedException { + List splits = OrcInputFormat.generateSplitsInfo(jobContext.getConfiguration()); + List result = new ArrayList(); + for (OrcInputFormat.Context.FileSplitInfo split : splits) { + FileSplit newSplit = new FileSplit(split.getPath(), split.getStart(), split.getLength(), split.getLocations()); + result.add(newSplit); + } + return result; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewOutputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewOutputFormat.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewOutputFormat.java (working copy) @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; + +/** An OutputFormat that writes ORC files. */ +public class OrcNewOutputFormat extends FileOutputFormat { + + private static class OrcRecordWriter + extends RecordWriter { + private Writer writer = null; + private final Path path; + private final OrcFile.WriterOptions options; + OrcRecordWriter(Path path, OrcFile.WriterOptions options) { + this.path = path; + this.options = options; + } + @Override + public void write(NullWritable key, OrcSerdeRow row) throws IOException, InterruptedException { + if (writer == null) { + options.inspector(row.getInspector()); + writer = OrcFile.createWriter(path, options); + } + writer.addRow(row.getRow()); + } + + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException { + if (writer == null) { + // a row with no columns + ObjectInspector inspector = ObjectInspectorFactory. + getStandardStructObjectInspector(new ArrayList(), + new ArrayList()); + options.inspector(inspector); + writer = OrcFile.createWriter(path, options); + } + writer.close(); + } + } + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, + InterruptedException { + Path file = getDefaultWorkFile(context, ""); + return new + OrcRecordWriter(file, OrcFile.writerOptions(context.getConfiguration())); + } +} Index: ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (revision 1540411) +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (working copy) @@ -427,7 +427,7 @@ new OrcInputFormat.SplitGenerator(context, fs, fs.getFileStatus(new Path("/a/file"))); splitter.createSplit(0, 200); - FileSplit result = context.getResult(-1); + OrcInputFormat.Context.FileSplitInfo result = context.getResult(-1); assertEquals(0, result.getStart()); assertEquals(200, result.getLength()); assertEquals("/a/file", result.getPath().toString()); @@ -476,7 +476,7 @@ } throw new IOException("Errors during splitting"); } - FileSplit result = context.getResult(0); + OrcInputFormat.Context.FileSplitInfo result = context.getResult(0); assertEquals(3, result.getStart()); assertEquals(497, result.getLength()); result = context.getResult(1); Index: ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java (working copy) @@ -0,0 +1,403 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertTrue; +import static junit.framework.Assert.assertFalse; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.UUID; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hive.common.util.HiveTestUtils; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +public class TestNewInputOutputFormat { + + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + + Configuration conf; + FileSystem localFs; + + @Before + public void setup() throws Exception { + conf = new Configuration(); + conf.set("mapred.job.tracker", "local"); + conf.set("fs.default.name", "local"); + localFs = FileSystem.get(conf); + } + + @Rule + public TestName testCaseName = new TestName(); + + public static class OrcTestMapper1 extends Mapper { + @Override + public void map(Object key, Writable value, Context context) + throws IOException, InterruptedException { + context.write(null, new Text(value.toString())); + } + } + + @Test + // Test regular inputformat + public void testNewInputFormat() throws Exception { + Job job = new Job(conf, "orc test"); + job.setInputFormatClass(OrcNewInputFormat.class); + job.setJarByClass(TestNewInputOutputFormat.class); + job.setMapperClass(OrcTestMapper1.class); + job.setNumReduceTasks(0); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + FileInputFormat.addInputPath(job, new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"))); + Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); + localFs.delete(outputPath, true); + FileOutputFormat.setOutputPath(job, outputPath); + boolean result = job.waitForCompletion(true); + assertTrue(result); + Path outputFilePath = new Path(outputPath, "part-m-00000"); + + assertTrue(localFs.exists(outputFilePath)); + BufferedReader reader = new BufferedReader(new InputStreamReader(localFs.open(outputFilePath))); + int count=0; + String line; + String lastLine=null; + while ((line=reader.readLine()) != null) { + count++; + lastLine = line; + } + reader.close(); + assertEquals(count, 7500); + assertEquals(lastLine, "{true, 100, 2048, 65536, 9223372036854775807, 2.0, -5.0" + + ", , bye, {[{1, bye}, {2, sigh}]}, [{100000000, cat}, {-100000, in}, {1234, hat}]," + + " {chani={5, chani}, mauddib={1, mauddib}}, 2000-03-12 15:00:01.0, 12345678.6547457}"); + localFs.delete(outputPath, true); + } + + public static class OrcTestMapper2 extends Mapper { + private final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct"); + private final ObjectInspector oip = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); + private final OrcSerde serde = new OrcSerde(); + private Writable row; + @Override + public void map(Object key, Text value, Context context) + throws IOException, InterruptedException { + String[] items = value.toString().split(","); + List struct = new ArrayList(2); + struct.add(0, Integer.parseInt(items[0])); + struct.add(1, items[1]); + row = serde.serialize(struct, oip); + context.write(null, row); + } + } + + @Test + //Test regular outputformat + public void testNewOutputFormat() throws Exception { + int rownum=1000; + + Path inputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); + Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); + localFs.delete(outputPath, true); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(localFs.create(inputPath))); + Random r = new Random(1000L); + boolean firstRow = true; + int firstIntValue = 0; + String firstStringValue = null; + for (int i=0;i { + @Override + public void map(Object key, Text value, Context context) + throws IOException, InterruptedException { + String items[] = value.toString().split("\\s+"); + context.write(new IntWritable(items.length), value); + } + } + + public static class OrcTestReducer3 extends + Reducer { + final static TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( + "struct>," + + "wordcounts:map>"); + private final ObjectInspector oip = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); + private final OrcSerde serde = new OrcSerde(); + private Writable row; + + @Override + public void reduce(IntWritable key, Iterable values, Context context) + throws IOException, InterruptedException { + List lastwords = new ArrayList(); + Map wordCounts = new HashMap(); + int count = 0; + for (Text val : values) { + String[] items = val.toString().toLowerCase().split("\\s+"); + lastwords.add(items[items.length-1]); + for (String item : items) { + if (wordCounts.containsKey(item)) { + wordCounts.put(item, wordCounts.get(item)+1); + } else { + wordCounts.put(item, 1); + } + } + count++; + } + List struct = new ArrayList(4); + struct.add(0, key.get()); + struct.add(1, count); + List> lastWordInfoList = new ArrayList>(); + for (String word : lastwords) { + List info = new ArrayList(2); + info.add(0, word); + info.add(1, word.length()); + lastWordInfoList.add(info); + } + struct.add(2, lastWordInfoList); + struct.add(3, wordCounts); + row = serde.serialize(struct, oip); + context.write(NullWritable.get(), row); + } + } + + @SuppressWarnings("unchecked") + @Test + //Test outputformat with complex data type, and with reduce + public void testNewOutputFormatComplex() throws Exception { + Path inputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); + Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); + localFs.delete(outputPath, true); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(localFs.create(inputPath))); + pw.println("I have eaten"); + pw.println("the plums"); + pw.println("that were in"); + pw.println("the icebox"); + pw.println("and which"); + pw.println("you were probably"); + pw.println("saving"); + pw.println("for breakfast"); + pw.println("Forgive me"); + pw.println("they were delicious"); + pw.println("so sweet"); + pw.println("and so cold"); + pw.close(); + + Job job = new Job(conf, "orc test"); + job.setOutputFormatClass(OrcNewOutputFormat.class); + job.setJarByClass(TestNewInputOutputFormat.class); + job.setMapperClass(OrcTestMapper3.class); + job.setReducerClass(OrcTestReducer3.class); + job.setMapOutputKeyClass(IntWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(OrcSerdeRow.class); + FileInputFormat.addInputPath(job, inputPath); + FileOutputFormat.setOutputPath(job, outputPath); + boolean result = job.waitForCompletion(true); + assertTrue(result); + + Path outputFilePath = new Path(outputPath, "part-r-00000"); + Reader reader = OrcFile.createReader(localFs, outputFilePath); + + RecordReader rows = reader.rows(null); + ObjectInspector orcOi = reader.getObjectInspector(); + ObjectInspector stoi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(OrcTestReducer3.typeInfo); + ObjectInspectorConverters.Converter converter = ObjectInspectorConverters.getConverter(orcOi, stoi); + + Object row = rows.next(null); + List converted = (List)converter.convert(row); + assertEquals(converted.get(0), 1); + assertEquals(converted.get(1), 1); + List list = (List)converted.get(2); + assertEquals(list.size(), 1); + assertEquals(((List)list.get(0)).get(0), "saving"); + assertEquals(((List)list.get(0)).get(1), 6); + Map map = (Map)converted.get(3); + assertEquals(map.size(), 1); + assertEquals(map.get("saving"), new Integer(1)); + + row = rows.next(null); + converted = (List)converter.convert(row); + assertEquals(converted.get(0), 2); + assertEquals(converted.get(1), 6); + list = (List)converted.get(2); + assertEquals(list.size(), 6); + assertEquals(((List)list.get(0)).get(0), "plums"); + assertEquals(((List)list.get(0)).get(1), 5); + map = (Map)converted.get(3); + assertEquals(map.size(), 11); + assertEquals(map.get("the"), new Integer(2)); + + row = rows.next(null); + converted = (List)converter.convert(row); + assertEquals(converted.get(0), 3); + assertEquals(converted.get(1), 5); + list = (List)converted.get(2); + assertEquals(list.size(), 5); + assertEquals(((List)list.get(0)).get(0), "eaten"); + assertEquals(((List)list.get(0)).get(1), 5); + map = (Map)converted.get(3); + assertEquals(map.size(), 13); + assertEquals(map.get("were"), new Integer(3)); + + assertFalse(rows.hasNext()); + + localFs.delete(outputPath, true); + } + + @Test + // Test inputformat with column prune + public void testNewInputFormatPruning() throws Exception { + conf.set("hive.io.file.read.all.columns", "false"); + conf.set("hive.io.file.readcolumn.ids", "1,3"); + Job job = new Job(conf, "orc test"); + job.setInputFormatClass(OrcNewInputFormat.class); + job.setJarByClass(TestNewInputOutputFormat.class); + job.setMapperClass(OrcTestMapper1.class); + job.setNumReduceTasks(0); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + FileInputFormat.addInputPath(job, new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"))); + Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); + localFs.delete(outputPath, true); + FileOutputFormat.setOutputPath(job, outputPath); + boolean result = job.waitForCompletion(true); + assertTrue(result); + Path outputFilePath = new Path(outputPath, "part-m-00000"); + + BufferedReader reader = new BufferedReader(new InputStreamReader(localFs.open(outputFilePath))); + String line=reader.readLine(); + + assertEquals(line, "{null, 1, null, 65536, null, null, null, null, null, null, null, null, null, null}"); + + localFs.delete(outputPath, true); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java (revision 1540411) +++ serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java (working copy) @@ -31,9 +31,9 @@ public final class ColumnProjectionUtils { public static final String READ_COLUMN_IDS_CONF_STR = "hive.io.file.readcolumn.ids"; + public static final String READ_ALL_COLUMNS = "hive.io.file.read.all.columns"; public static final String READ_COLUMN_NAMES_CONF_STR = "hive.io.file.readcolumn.names"; private static final String READ_COLUMN_IDS_CONF_STR_DEFAULT = ""; - private static final String READ_ALL_COLUMNS = "hive.io.file.read.all.columns"; private static final boolean READ_ALL_COLUMNS_DEFAULT = true; /**