commit 89dd722fcc0383e7fa7d14968051ff23b24aead0 Author: Owen O'Malley Date: Mon Mar 31 23:43:42 2014 +0200 BUG-15793. Fix bucked non-acid tables with vectorization. diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 0ccc3ad..81c630a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -536,10 +536,12 @@ public void run() { // Generate a split for any buckets that weren't covered. // This happens in the case where a bucket just has deltas and no // base. - for(int b=0; b < context.numBuckets; ++b) { - if (!covered[b]) { - context.splits.add(new OrcSplit(dir, b, 0, new String[0], null, - false, false, deltas)); + if (!deltas.isEmpty()) { + for (int b = 0; b < context.numBuckets; ++b) { + if (!covered[b]) { + context.splits.add(new OrcSplit(dir, b, 0, new String[0], null, + false, false, deltas)); + } } } } catch (Throwable th) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 04065ff..6f0e328 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -175,9 +175,11 @@ static ObjectInspector createEventSchema(ObjectInspector rowInspector) { FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false); strm.writeInt(ORC_ACID_VERSION); strm.close(); - LOG.info("Created " + path + "/" + ACID_FORMAT); } catch (IOException ioe) { - // we just need one task to write this file + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " + + ioe); + } } if (options.getMinimumTransactionId() != options.getMaximumTransactionId() && !options.isWritingBase()){ diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index f0e1e39..42a4bf7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -48,6 +48,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -1078,6 +1079,54 @@ public void testVectorization() throws Exception { assertEquals(false, reader.next(key, value)); } + /** + * Test vectorization, non-acid, non-combine. + * @throws Exception + */ + @Test + public void testVectorizationWithBuckets() throws Exception { + // get the object inspector for MyRow + StructObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = (StructObjectInspector) + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), + inspector, true); + + // write the orc file to the mock file system + Writer writer = + OrcFile.createWriter(new Path(conf.get("mapred.input.dir") + "/0_0"), + OrcFile.writerOptions(conf).blockPadding(false) + .bufferSize(1024).inspector(inspector)); + for(int i=0; i < 10; ++i) { + writer.addRow(new MyRow(i, 2*i)); + } + writer.close(); + ((MockOutputStream) ((WriterImpl) writer).getStream()) + .setBlocks(new MockBlock("host0", "host1")); + + // call getsplits + conf.setInt(hive_metastoreConstants.BUCKET_COUNT, 3); + HiveInputFormat inputFormat = + new HiveInputFormat(); + InputSplit[] splits = inputFormat.getSplits(conf, 10); + assertEquals(1, splits.length); + + org.apache.hadoop.mapred.RecordReader + reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL); + NullWritable key = reader.createKey(); + VectorizedRowBatch value = reader.createValue(); + assertEquals(true, reader.next(key, value)); + assertEquals(10, value.count()); + LongColumnVector col0 = (LongColumnVector) value.cols[0]; + for(int i=0; i < 10; i++) { + assertEquals("checking " + i, i, col0.vector[i]); + } + assertEquals(false, reader.next(key, value)); + } + // test acid with vectorization, no combine @Test public void testVectorizationWithAcid() throws Exception {