diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e0c8e82bcc..9b44e309f9 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2174,6 +2174,10 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { HIVEOPTLISTBUCKETING("hive.optimize.listbucketing", false, "Enable list bucketing optimizer. Default value is false so that we disable it by default."), + ORC_ENFORCE_COMPRESSION_BUFFER_SIZE("hive.exec.orc.buffer.size.enforce", false, + "Defines whether to enforce ORC compression buffer size."), + + // Allow TCP Keep alive socket option for for HiveServer or a maximum timeout for the socket. SERVER_READ_SOCKET_TIMEOUT("hive.server.read.socket.timeout", "10s", new TimeValidator(TimeUnit.SECONDS), diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index dc00e38fe7..c83b8c16ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -33,6 +33,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -161,7 +162,8 @@ private WriterVersion(int id) { BLOCK_PADDING("orc.block.padding"), ENCODING_STRATEGY("orc.encoding.strategy"), BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns"), - BLOOM_FILTER_FPP("orc.bloom.filter.fpp"); + BLOOM_FILTER_FPP("orc.bloom.filter.fpp"), + ENFORCE_BUFFER_SIZE("orc.buffer.size.enforce"); private final String propName; @@ -314,6 +316,12 @@ public static Reader createReader(Path path, : CompressionKind.valueOf(propValue.toUpperCase()); propValue = tableProperties == null ? null + : tableProperties.getProperty(OrcTableProperties.ENFORCE_BUFFER_SIZE.propName); + enforceBufferSize = propValue == null ? HiveConf.getBoolVar(conf, ConfVars.ORC_ENFORCE_COMPRESSION_BUFFER_SIZE) + : Boolean.parseBoolean(propValue); + + + propValue = tableProperties == null ? null : tableProperties.getProperty(OrcTableProperties.BLOOM_FILTER_COLUMNS.propName); bloomFilterColumns = propValue; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java index e2096eb839..32992e99c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java @@ -282,5 +282,20 @@ public void suppress() { public boolean isSuppressed() { return suppress; } + + /** + * Throws exception if the bufferSize argument equals or exceeds 2^(3*8 - 1). + * See {@link OutStream#writeHeader(ByteBuffer, int, int, boolean)}. + * The bufferSize needs to be expressible in 3 bytes, and uses the least significant byte + * to indicate original/compressed bytes. + * @param bufferSize The ORC compression buffer size being checked. + * @throws IllegalArgumentException If bufferSize value exceeds threshold. + */ + static void assertBufferSizeValid(int bufferSize) throws IllegalArgumentException { + if (bufferSize >= (1 << 23)) { + throw new IllegalArgumentException("Illegal value of ORC compression buffer size: " + bufferSize); + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 3929d7be64..fc7c39723b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -210,6 +210,7 @@ public Writer getWriter() { allColumns = getColumnNamesFromInspector(inspector); } if (enforceBufferSize) { + OutStream.assertBufferSizeValid(bufferSize); this.bufferSize = bufferSize; } else { this.bufferSize = getEstimatedBufferSize(defaultStripeSize, diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOutStream.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOutStream.java index 58c1bfb38d..a23e246eb0 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOutStream.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOutStream.java @@ -24,6 +24,7 @@ import java.nio.ByteBuffer; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; public class TestOutStream { @Test @@ -38,4 +39,19 @@ public void testFlush() throws Exception { Mockito.verify(receiver).output(Mockito.any(ByteBuffer.class)); assertEquals(0L, stream.getBufferSize()); } + + @Test + public void testAssertBufferSizeValid() throws Exception { + try { + OutStream.assertBufferSizeValid(1 + (1<<23)); + fail("Invalid buffer-size " + (1 + (1<<23)) + " should have been blocked."); + } + catch (IllegalArgumentException expected) { + // Pass. + } + + OutStream.assertBufferSizeValid((1<<23) - 1); + } + + } \ No newline at end of file