diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java index eae0a3327e..701333d0c6 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java @@ -63,8 +63,10 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcStruct; +import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.JobConf; @@ -74,6 +76,7 @@ import org.apache.hive.hcatalog.streaming.StreamingConnection; import org.apache.hive.hcatalog.streaming.StreamingException; import org.apache.hive.hcatalog.streaming.TransactionBatch; +import org.apache.orc.OrcConf; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -1228,6 +1231,19 @@ public void testTableProperties() throws Exception { Assert.assertEquals("ttp1", rsp.getCompacts().get(1).getTablename()); Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(1).getState()); + /** + * we just did a major compaction on ttp1. Open any file produced by it and check buffer size. + * It should be the default. + */ + List rs = execSelectAndDumpData("select distinct INPUT__FILE__NAME from " + + tblName1, driver, "Find Orc File bufer default"); + Assert.assertTrue("empty rs?", rs != null && rs.size() > 0); + Path p = new Path(rs.get(0)); + Reader orcReader = OrcFile.createReader(p.getFileSystem(conf), p); + Assert.assertEquals("Expected default compression size", + OrcConf.BUFFER_SIZE.getDefaultValue(), orcReader.getCompressionSize()); + + // Insert one more row - this should trigger hive.compactor.delta.pct.threshold to be reached for ttp2 executeStatementOnDriver("insert into " + tblName1 + " values (6, 'f')", driver); executeStatementOnDriver("insert into " + tblName2 + " values (6, 'f')", driver); @@ -1254,12 +1270,14 @@ public void testTableProperties() throws Exception { executeStatementOnDriver("alter table " + tblName2 + " compact 'major'" + " with overwrite tblproperties (" + "'compactor.mapreduce.map.memory.mb'='3072'," + - "'tblprops.orc.compress.size'='8192')", driver); + "'tblprops.orc.compress.size'='3141')", driver); rsp = txnHandler.showCompact(new ShowCompactRequest()); Assert.assertEquals(4, rsp.getCompacts().size()); Assert.assertEquals("ttp2", rsp.getCompacts().get(0).getTablename()); Assert.assertEquals(TxnStore.INITIATED_RESPONSE, rsp.getCompacts().get(0).getState()); + //make sure we are checking the right (latest) compaction entry + Assert.assertEquals(4, rsp.getCompacts().get(0).getId()); // Run the Worker explicitly, in order to get the reference to the compactor MR job stop = new AtomicBoolean(true); @@ -1271,7 +1289,18 @@ public void testTableProperties() throws Exception { t.run(); job = t.getMrJob(); Assert.assertEquals(3072, job.getMemoryForMapTask()); - Assert.assertTrue(job.get("hive.compactor.table.props").contains("orc.compress.size4:8192")); + Assert.assertTrue(job.get("hive.compactor.table.props").contains("orc.compress.size4:3141")); + /*createReader(FileSystem fs, Path path) throws IOException { + */ + //we just ran Major compaction so we should have a base_x in tblName2 that has the new files + // Get the name of a file and look at its properties to see if orc.compress.size was respected. + rs = execSelectAndDumpData("select distinct INPUT__FILE__NAME from " + tblName2, + driver, "Find Compacted Orc File"); + Assert.assertTrue("empty rs?", rs != null && rs.size() > 0); + p = new Path(rs.get(0)); + orcReader = OrcFile.createReader(p.getFileSystem(conf), p); + Assert.assertEquals("File written with wrong buffer size", + 3141, orcReader.getCompressionSize()); } private void writeBatch(StreamingConnection connection, DelimitedInputWriter writer, diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java index 214f22afcc..0b4558d3f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java @@ -287,7 +287,7 @@ public RecordUpdater getRecordUpdater(Path path, getRawRecordWriter(Path path, Options options) throws IOException { final Path filename = AcidUtils.createFilename(path, options); final OrcFile.WriterOptions opts = - OrcFile.writerOptions(options.getConfiguration()); + OrcFile.writerOptions(options.getTableProperties(), options.getConfiguration()); if (!options.isWritingBase()) { opts.bufferSize(OrcRecordUpdater.DELTA_BUFFER_SIZE) .stripeSize(OrcRecordUpdater.DELTA_STRIPE_SIZE)