commit 03d16bdbe1dc5b85698e3f3235b423baa7283156 Author: Mithun RK Date: Mon Sep 25 15:21:25 2017 -0700 HIVE-17600: Make OrcFile's "enforceBufferSize" user-settable. diff --git a/orc/src/java/org/apache/hive/orc/OrcConf.java b/orc/src/java/org/apache/hive/orc/OrcConf.java index dc2f86577b..90df756a71 100644 --- a/orc/src/java/org/apache/hive/orc/OrcConf.java +++ b/orc/src/java/org/apache/hive/orc/OrcConf.java @@ -51,6 +51,8 @@ "Define the version of the file to write. Possible values are 0.11 and\n"+ " 0.12. If this parameter is not defined, ORC will use the run\n" + " length encoding (RLE) introduced in Hive 0.12."), + ENFORCE_COMPRESSION_BUFFER_SIZE("orc.buffer.size.enforce", "hive.exec.orc.buffer.size.enforce", false, + "Defines whether to enforce ORC compression buffer size."), ENCODING_STRATEGY("orc.encoding.strategy", "hive.exec.orc.encoding.strategy", "SPEED", "Define the encoding strategy to use while writing data. Changing this\n"+ diff --git a/orc/src/java/org/apache/hive/orc/OrcFile.java b/orc/src/java/org/apache/hive/orc/OrcFile.java index 5670a6172b..5c7188f7db 100644 --- a/orc/src/java/org/apache/hive/orc/OrcFile.java +++ b/orc/src/java/org/apache/hive/orc/OrcFile.java @@ -276,6 +276,7 @@ protected WriterOptions(Properties tableProperties, Configuration conf) { compressValue = CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties, conf).toUpperCase()); + enforceBufferSize = OrcConf.ENFORCE_COMPRESSION_BUFFER_SIZE.getBoolean(tableProperties, conf); String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties, conf); versionValue = Version.byName(versionName); diff --git a/orc/src/java/org/apache/hive/orc/impl/OutStream.java b/orc/src/java/org/apache/hive/orc/impl/OutStream.java index 7157ac5840..4c46cb3c58 100644 --- a/orc/src/java/org/apache/hive/orc/impl/OutStream.java +++ b/orc/src/java/org/apache/hive/orc/impl/OutStream.java @@ -113,6 +113,20 @@ private void getNewInputBuffer() throws IOException { } /** + * Throws exception if the bufferSize argument equals or exceeds 2^(3*8 - 1). + * See {@link OutStream#writeHeader(ByteBuffer, int, int, boolean)}. + * The bufferSize needs to be expressible in 3 bytes, and uses the least significant byte + * to indicate original/compressed bytes. + * @param bufferSize The ORC compression buffer size being checked. + * @throws IllegalArgumentException If bufferSize value exceeds threshold. + */ + static void assertBufferSizeValid(int bufferSize) throws IllegalArgumentException { + if (bufferSize >= (1 << 23)) { + throw new IllegalArgumentException("Illegal value of ORC compression buffer size: " + bufferSize); + } + } + + /** * Allocate a new output buffer if we are compressing. */ private ByteBuffer getNewOutputBuffer() throws IOException { diff --git a/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java b/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java index 47c33bb6a5..1207b2dba2 100644 --- a/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java +++ b/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java @@ -88,6 +88,7 @@ public PhysicalFsWriter(FileSystem fs, Path path, int numColumns, OrcFile.Writer this.defaultStripeSize = this.adjustedStripeSize = opts.getStripeSize(); this.addBlockPadding = opts.getBlockPadding(); if (opts.isEnforceBufferSize()) { + OutStream.assertBufferSizeValid(opts.getBufferSize()); this.bufferSize = opts.getBufferSize(); } else { this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, opts.getBufferSize()); @@ -253,15 +254,15 @@ private static int getClosestBufferSize(int estBufferSize) { final int kb256 = 256 * 1024; if (estBufferSize <= kb4) { return kb4; - } else if (estBufferSize > kb4 && estBufferSize <= kb8) { + } else if (estBufferSize <= kb8) { return kb8; - } else if (estBufferSize > kb8 && estBufferSize <= kb16) { + } else if (estBufferSize <= kb16) { return kb16; - } else if (estBufferSize > kb16 && estBufferSize <= kb32) { + } else if (estBufferSize <= kb32) { return kb32; - } else if (estBufferSize > kb32 && estBufferSize <= kb64) { + } else if (estBufferSize <= kb64) { return kb64; - } else if (estBufferSize > kb64 && estBufferSize <= kb128) { + } else if (estBufferSize <= kb128) { return kb128; } else { return kb256; diff --git a/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java index 23c13f4b21..65948c5e0f 100644 --- a/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java +++ b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java @@ -25,6 +25,7 @@ import java.nio.ByteBuffer; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; public class TestOutStream { @@ -40,4 +41,17 @@ public void testFlush() throws Exception { Mockito.verify(receiver).output(Mockito.any(ByteBuffer.class)); assertEquals(0L, stream.getBufferSize()); } + + @Test + public void testAssertBufferSizeValid() throws Exception { + try { + OutStream.assertBufferSizeValid(1 + (1<<23)); + fail("Invalid buffer-size " + (1 + (1<<23)) + " should have been blocked."); + } + catch (IllegalArgumentException expected) { + // Pass. + } + + OutStream.assertBufferSizeValid((1<<23) - 1); + } }