diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
index 6344a66..95f8cc8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
@@ -118,8 +118,8 @@
*
8 bits for lower run length bits
*
*
- * Base value - encoded as varint
- * Delta base - encoded as varint
+ * Base value - zigzag encoded value written as varint
+ * Delta base - zigzag encoded value written as varint
* Delta blob - only positive values. monotonicity and orderness are decided
* based on the sign of the base value and delta base
*
@@ -472,29 +472,28 @@ private void determineEncoding() {
// invariant - subtracting any number from any other in the literals after
// this point won't overflow
+ // if min is equal to max then the delta is 0, this condition happens for
+ // fixed values run >10 which cannot be encoded with SHORT_REPEAT
+ if (min == max) {
+ assert isFixedDelta : min + "==" + max +
+ ", isFixedDelta cannot be false";
+ assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
+ fixedDelta = 0;
+ encoding = EncodingType.DELTA;
+ return;
+ }
+
+ if (isFixedDelta) {
+ assert currDelta == initialDelta
+ : "currDelta should be equal to initialDelta for fixed delta encoding";
+ encoding = EncodingType.DELTA;
+ fixedDelta = currDelta;
+ return;
+ }
+
// if initialDelta is 0 then we cannot delta encode as we cannot identify
// the sign of deltas (increasing or decreasing)
if (initialDelta != 0) {
-
- // if min is equal to max then the delta is 0, this condition happens for
- // fixed values run >10 which cannot be encoded with SHORT_REPEAT
- if (min == max) {
- assert isFixedDelta : min + "==" + max +
- ", isFixedDelta cannot be false";
- assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
- fixedDelta = 0;
- encoding = EncodingType.DELTA;
- return;
- }
-
- if (isFixedDelta) {
- assert currDelta == initialDelta
- : "currDelta should be equal to initialDelta for fixed delta encoding";
- encoding = EncodingType.DELTA;
- fixedDelta = currDelta;
- return;
- }
-
// stores the number of bits required for packing delta blob in
// delta encoding
bitsDeltaMax = utils.findClosestNumBits(deltaMax);
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 255565e..6620a66 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -1895,9 +1895,9 @@ public void testMemoryManagementV12() throws Exception {
stripe.getDataLength() < 5000);
}
// with HIVE-7832, the dictionaries will be disabled after writing the first
- // stripe as there are too many distinct values. Hence only 4 stripes as
+ // stripe as there are too many distinct values. Hence only 3 stripes as
// compared to 25 stripes in version 0.11 (above test case)
- assertEquals(4, i);
+ assertEquals(3, i);
assertEquals(2500, reader.getNumberOfRows());
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
new file mode 100644
index 0000000..1a3559e
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
@@ -0,0 +1,297 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestRLEv2 {
+ Path workDir = new Path(System.getProperty("test.tmp.dir",
+ "target" + File.separator + "test" + File.separator + "tmp"));
+ Path testFilePath;
+ Configuration conf;
+ FileSystem fs;
+
+ @Rule
+ public TestName testCaseName = new TestName();
+
+ @Before
+ public void openFileSystem () throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ testFilePath = new Path(workDir, "TestRLEv2." +
+ testCaseName.getMethodName() + ".orc");
+ fs.delete(testFilePath, false);
+ }
+
+ @Test
+ public void testFixedDeltaZero() throws Exception {
+ ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .inspector(inspector)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ for (int i = 0; i < 5120; ++i) {
+ w.addRow(123);
+ }
+ w.close();
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toUri().toString()});
+ System.out.flush();
+ String outDump = new String(myOut.toByteArray());
+ // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123,
+ // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run.
+ assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+ System.setOut(origOut);
+ }
+
+ @Test
+ public void testFixedDeltaOne() throws Exception {
+ ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .inspector(inspector)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ for (int i = 0; i < 5120; ++i) {
+ w.addRow(i % 512);
+ }
+ w.close();
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toUri().toString()});
+ System.out.flush();
+ String outDump = new String(myOut.toByteArray());
+ // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
+ // and 1 byte delta (delta = 1). In total, 4 bytes per run.
+ assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40"));
+ System.setOut(origOut);
+ }
+
+ @Test
+ public void testFixedDeltaOneDescending() throws Exception {
+ ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .inspector(inspector)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ for (int i = 0; i < 5120; ++i) {
+ w.addRow(512 - (i % 512));
+ }
+ w.close();
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toUri().toString()});
+ System.out.flush();
+ String outDump = new String(myOut.toByteArray());
+ // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
+ // and 1 byte delta (delta = 1). In total, 5 bytes per run.
+ assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+ System.setOut(origOut);
+ }
+
+ @Test
+ public void testFixedDeltaLarge() throws Exception {
+ ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .inspector(inspector)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ for (int i = 0; i < 5120; ++i) {
+ w.addRow(i % 512 + ((i % 512 ) * 100));
+ }
+ w.close();
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toUri().toString()});
+ System.out.flush();
+ String outDump = new String(myOut.toByteArray());
+ // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
+ // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run.
+ assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+ System.setOut(origOut);
+ }
+
+ @Test
+ public void testFixedDeltaLargeDescending() throws Exception {
+ ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .inspector(inspector)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ for (int i = 0; i < 5120; ++i) {
+ w.addRow((512 - i % 512) + ((i % 512 ) * 100));
+ }
+ w.close();
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toUri().toString()});
+ System.out.flush();
+ String outDump = new String(myOut.toByteArray());
+ // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
+ // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run.
+ assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60"));
+ System.setOut(origOut);
+ }
+
+ @Test
+ public void testShortRepeat() throws Exception {
+ ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .inspector(inspector)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ for (int i = 0; i < 5; ++i) {
+ w.addRow(10);
+ }
+ w.close();
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toUri().toString()});
+ System.out.flush();
+ String outDump = new String(myOut.toByteArray());
+ // 1 byte header + 1 byte value
+ assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2"));
+ System.setOut(origOut);
+ }
+
+ @Test
+ public void testDeltaUnknownSign() throws Exception {
+ ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .inspector(inspector)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ w.addRow(0);
+ for (int i = 0; i < 511; ++i) {
+ w.addRow(i);
+ }
+ w.close();
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toUri().toString()});
+ System.out.flush();
+ String outDump = new String(myOut.toByteArray());
+ // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
+ // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
+ // each, 5120/8 = 640). Total bytes 642
+ assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642"));
+ System.setOut(origOut);
+ }
+
+ @Test
+ public void testPatchedBase() throws Exception {
+ ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .inspector(inspector)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ Random rand = new Random(123);
+ w.addRow(10000000);
+ for (int i = 0; i < 511; ++i) {
+ w.addRow(rand.nextInt(i+1));
+ }
+ w.close();
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toUri().toString()});
+ System.out.flush();
+ String outDump = new String(myOut.toByteArray());
+ // use PATCHED_BASE encoding
+ assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583"));
+ System.setOut(origOut);
+ }
+}
diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out
index 44c60b7..2b12ddb 100644
--- a/ql/src/test/resources/orc-file-has-null.out
+++ b/ql/src/test/resources/orc-file-has-null.out
@@ -29,35 +29,35 @@ File Statistics:
Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000
Stripes:
- Stripe: offset: 3 data: 241 rows: 5000 tail: 67 index: 163
+ Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154
Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 64
- Stream: column 2 section ROW_INDEX start: 84 length 82
- Stream: column 1 section DATA start: 166 length 159
- Stream: column 1 section LENGTH start: 325 length 32
- Stream: column 2 section PRESENT start: 357 length 13
- Stream: column 2 section DATA start: 370 length 22
- Stream: column 2 section LENGTH start: 392 length 6
- Stream: column 2 section DICTIONARY_DATA start: 398 length 9
+ Stream: column 1 section ROW_INDEX start: 20 length 60
+ Stream: column 2 section ROW_INDEX start: 80 length 77
+ Stream: column 1 section DATA start: 157 length 159
+ Stream: column 1 section LENGTH start: 316 length 15
+ Stream: column 2 section PRESENT start: 331 length 13
+ Stream: column 2 section DATA start: 344 length 18
+ Stream: column 2 section LENGTH start: 362 length 6
+ Stream: column 2 section DICTIONARY_DATA start: 368 length 9
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[2]
Row group indices for column 2:
Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0
- Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,66,488
- Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488
- Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,136,488
- Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,136,488
- Stripe: offset: 474 data: 202 rows: 5000 tail: 64 index: 120
- Stream: column 0 section ROW_INDEX start: 474 length 17
- Stream: column 1 section ROW_INDEX start: 491 length 64
- Stream: column 2 section ROW_INDEX start: 555 length 39
- Stream: column 1 section DATA start: 594 length 159
- Stream: column 1 section LENGTH start: 753 length 32
- Stream: column 2 section PRESENT start: 785 length 11
- Stream: column 2 section DATA start: 796 length 0
- Stream: column 2 section LENGTH start: 796 length 0
- Stream: column 2 section DICTIONARY_DATA start: 796 length 0
+ Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488
+ Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488
+ Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488
+ Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488
+ Stripe: offset: 442 data: 185 rows: 5000 tail: 64 index: 116
+ Stream: column 0 section ROW_INDEX start: 442 length 17
+ Stream: column 1 section ROW_INDEX start: 459 length 60
+ Stream: column 2 section ROW_INDEX start: 519 length 39
+ Stream: column 1 section DATA start: 558 length 159
+ Stream: column 1 section LENGTH start: 717 length 15
+ Stream: column 2 section PRESENT start: 732 length 11
+ Stream: column 2 section DATA start: 743 length 0
+ Stream: column 2 section LENGTH start: 743 length 0
+ Stream: column 2 section DICTIONARY_DATA start: 743 length 0
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[0]
@@ -67,34 +67,34 @@ Stripes:
Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0
Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0
- Stripe: offset: 860 data: 232 rows: 5000 tail: 63 index: 149
- Stream: column 0 section ROW_INDEX start: 860 length 17
- Stream: column 1 section ROW_INDEX start: 877 length 64
- Stream: column 2 section ROW_INDEX start: 941 length 68
- Stream: column 1 section DATA start: 1009 length 159
- Stream: column 1 section LENGTH start: 1168 length 32
- Stream: column 2 section DATA start: 1200 length 24
- Stream: column 2 section LENGTH start: 1224 length 6
- Stream: column 2 section DICTIONARY_DATA start: 1230 length 11
+ Stripe: offset: 807 data: 206 rows: 5000 tail: 60 index: 137
+ Stream: column 0 section ROW_INDEX start: 807 length 17
+ Stream: column 1 section ROW_INDEX start: 824 length 60
+ Stream: column 2 section ROW_INDEX start: 884 length 60
+ Stream: column 1 section DATA start: 944 length 159
+ Stream: column 1 section LENGTH start: 1103 length 15
+ Stream: column 2 section DATA start: 1118 length 15
+ Stream: column 2 section LENGTH start: 1133 length 6
+ Stream: column 2 section DICTIONARY_DATA start: 1139 length 11
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[1]
Row group indices for column 2:
Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0
- Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,66,488
- Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464
- Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440
- Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416
- Stripe: offset: 1304 data: 202 rows: 5000 tail: 64 index: 120
- Stream: column 0 section ROW_INDEX start: 1304 length 17
- Stream: column 1 section ROW_INDEX start: 1321 length 64
- Stream: column 2 section ROW_INDEX start: 1385 length 39
- Stream: column 1 section DATA start: 1424 length 159
- Stream: column 1 section LENGTH start: 1583 length 32
- Stream: column 2 section PRESENT start: 1615 length 11
- Stream: column 2 section DATA start: 1626 length 0
- Stream: column 2 section LENGTH start: 1626 length 0
- Stream: column 2 section DICTIONARY_DATA start: 1626 length 0
+ Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488
+ Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464
+ Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440
+ Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416
+ Stripe: offset: 1210 data: 185 rows: 5000 tail: 64 index: 116
+ Stream: column 0 section ROW_INDEX start: 1210 length 17
+ Stream: column 1 section ROW_INDEX start: 1227 length 60
+ Stream: column 2 section ROW_INDEX start: 1287 length 39
+ Stream: column 1 section DATA start: 1326 length 159
+ Stream: column 1 section LENGTH start: 1485 length 15
+ Stream: column 2 section PRESENT start: 1500 length 11
+ Stream: column 2 section DATA start: 1511 length 0
+ Stream: column 2 section LENGTH start: 1511 length 0
+ Stream: column 2 section DICTIONARY_DATA start: 1511 length 0
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[0]
@@ -105,6 +105,6 @@ Stripes:
Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0
-File length: 1940 bytes
+File length: 1823 bytes
Padding length: 0 bytes
Padding ratio: 0%
diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out
index 67aa189..7503c81 100644
--- a/ql/src/test/results/clientpositive/orc_file_dump.q.out
+++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out
@@ -129,7 +129,7 @@ File Statistics:
Column 11: count: 1049 hasNull: false sum: 13278
Stripes:
- Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
+ Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943
Stream: column 0 section ROW_INDEX start: 3 length 20
Stream: column 0 section BLOOM_FILTER start: 23 length 45
Stream: column 1 section ROW_INDEX start: 68 length 58
@@ -148,30 +148,30 @@ Stripes:
Stream: column 7 section BLOOM_FILTER start: 6812 length 45
Stream: column 8 section ROW_INDEX start: 6857 length 86
Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
- Stream: column 9 section ROW_INDEX start: 8100 length 51
- Stream: column 9 section BLOOM_FILTER start: 8151 length 62
- Stream: column 10 section ROW_INDEX start: 8213 length 82
- Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
- Stream: column 11 section ROW_INDEX start: 9592 length 47
- Stream: column 11 section BLOOM_FILTER start: 9639 length 308
- Stream: column 1 section PRESENT start: 9947 length 17
- Stream: column 1 section DATA start: 9964 length 962
- Stream: column 2 section PRESENT start: 10926 length 17
- Stream: column 2 section DATA start: 10943 length 1441
- Stream: column 3 section DATA start: 12384 length 1704
- Stream: column 4 section DATA start: 14088 length 1998
- Stream: column 5 section DATA start: 16086 length 2925
- Stream: column 6 section DATA start: 19011 length 3323
- Stream: column 7 section DATA start: 22334 length 137
- Stream: column 8 section DATA start: 22471 length 1572
- Stream: column 8 section LENGTH start: 24043 length 310
- Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
- Stream: column 9 section DATA start: 25901 length 62
- Stream: column 9 section SECONDARY start: 25963 length 1783
- Stream: column 10 section DATA start: 27746 length 2138
- Stream: column 10 section SECONDARY start: 29884 length 231
- Stream: column 11 section DATA start: 30115 length 1877
- Stream: column 11 section LENGTH start: 31992 length 591
+ Stream: column 9 section ROW_INDEX start: 8100 length 50
+ Stream: column 9 section BLOOM_FILTER start: 8150 length 62
+ Stream: column 10 section ROW_INDEX start: 8212 length 82
+ Stream: column 10 section BLOOM_FILTER start: 8294 length 1297
+ Stream: column 11 section ROW_INDEX start: 9591 length 47
+ Stream: column 11 section BLOOM_FILTER start: 9638 length 308
+ Stream: column 1 section PRESENT start: 9946 length 17
+ Stream: column 1 section DATA start: 9963 length 962
+ Stream: column 2 section PRESENT start: 10925 length 17
+ Stream: column 2 section DATA start: 10942 length 1441
+ Stream: column 3 section DATA start: 12383 length 1704
+ Stream: column 4 section DATA start: 14087 length 1998
+ Stream: column 5 section DATA start: 16085 length 2925
+ Stream: column 6 section DATA start: 19010 length 3323
+ Stream: column 7 section DATA start: 22333 length 137
+ Stream: column 8 section DATA start: 22470 length 1572
+ Stream: column 8 section LENGTH start: 24042 length 310
+ Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548
+ Stream: column 9 section DATA start: 25900 length 19
+ Stream: column 9 section SECONDARY start: 25919 length 1783
+ Stream: column 10 section DATA start: 27702 length 2138
+ Stream: column 10 section SECONDARY start: 29840 length 231
+ Stream: column 11 section DATA start: 30071 length 1877
+ Stream: column 11 section LENGTH start: 31948 length 591
Encoding column 0: DIRECT
Encoding column 1: DIRECT
Encoding column 2: DIRECT_V2
@@ -192,7 +192,7 @@ Stripes:
Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5
-File length: 33456 bytes
+File length: 33413 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
@@ -247,7 +247,7 @@ File Statistics:
Column 11: count: 1049 hasNull: false sum: 13278
Stripes:
- Stripe: offset: 3 data: 22636 rows: 1049 tail: 251 index: 15096
+ Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 15095
Stream: column 0 section ROW_INDEX start: 3 length 20
Stream: column 0 section BLOOM_FILTER start: 23 length 56
Stream: column 1 section ROW_INDEX start: 79 length 58
@@ -266,30 +266,30 @@ Stripes:
Stream: column 7 section BLOOM_FILTER start: 10385 length 56
Stream: column 8 section ROW_INDEX start: 10441 length 86
Stream: column 8 section BLOOM_FILTER start: 10527 length 1829
- Stream: column 9 section ROW_INDEX start: 12356 length 51
- Stream: column 9 section BLOOM_FILTER start: 12407 length 95
- Stream: column 10 section ROW_INDEX start: 12502 length 82
- Stream: column 10 section BLOOM_FILTER start: 12584 length 1994
- Stream: column 11 section ROW_INDEX start: 14578 length 47
- Stream: column 11 section BLOOM_FILTER start: 14625 length 474
- Stream: column 1 section PRESENT start: 15099 length 17
- Stream: column 1 section DATA start: 15116 length 962
- Stream: column 2 section PRESENT start: 16078 length 17
- Stream: column 2 section DATA start: 16095 length 1441
- Stream: column 3 section DATA start: 17536 length 1704
- Stream: column 4 section DATA start: 19240 length 1998
- Stream: column 5 section DATA start: 21238 length 2925
- Stream: column 6 section DATA start: 24163 length 3323
- Stream: column 7 section DATA start: 27486 length 137
- Stream: column 8 section DATA start: 27623 length 1572
- Stream: column 8 section LENGTH start: 29195 length 310
- Stream: column 8 section DICTIONARY_DATA start: 29505 length 1548
- Stream: column 9 section DATA start: 31053 length 62
- Stream: column 9 section SECONDARY start: 31115 length 1783
- Stream: column 10 section DATA start: 32898 length 2138
- Stream: column 10 section SECONDARY start: 35036 length 231
- Stream: column 11 section DATA start: 35267 length 1877
- Stream: column 11 section LENGTH start: 37144 length 591
+ Stream: column 9 section ROW_INDEX start: 12356 length 50
+ Stream: column 9 section BLOOM_FILTER start: 12406 length 95
+ Stream: column 10 section ROW_INDEX start: 12501 length 82
+ Stream: column 10 section BLOOM_FILTER start: 12583 length 1994
+ Stream: column 11 section ROW_INDEX start: 14577 length 47
+ Stream: column 11 section BLOOM_FILTER start: 14624 length 474
+ Stream: column 1 section PRESENT start: 15098 length 17
+ Stream: column 1 section DATA start: 15115 length 962
+ Stream: column 2 section PRESENT start: 16077 length 17
+ Stream: column 2 section DATA start: 16094 length 1441
+ Stream: column 3 section DATA start: 17535 length 1704
+ Stream: column 4 section DATA start: 19239 length 1998
+ Stream: column 5 section DATA start: 21237 length 2925
+ Stream: column 6 section DATA start: 24162 length 3323
+ Stream: column 7 section DATA start: 27485 length 137
+ Stream: column 8 section DATA start: 27622 length 1572
+ Stream: column 8 section LENGTH start: 29194 length 310
+ Stream: column 8 section DICTIONARY_DATA start: 29504 length 1548
+ Stream: column 9 section DATA start: 31052 length 19
+ Stream: column 9 section SECONDARY start: 31071 length 1783
+ Stream: column 10 section DATA start: 32854 length 2138
+ Stream: column 10 section SECONDARY start: 34992 length 231
+ Stream: column 11 section DATA start: 35223 length 1877
+ Stream: column 11 section LENGTH start: 37100 length 591
Encoding column 0: DIRECT
Encoding column 1: DIRECT
Encoding column 2: DIRECT_V2
@@ -310,7 +310,7 @@ Stripes:
Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11
Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8
-File length: 38610 bytes
+File length: 38565 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
@@ -377,7 +377,7 @@ File Statistics:
Column 11: count: 1049 hasNull: false sum: 13278
Stripes:
- Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
+ Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943
Stream: column 0 section ROW_INDEX start: 3 length 20
Stream: column 0 section BLOOM_FILTER start: 23 length 45
Stream: column 1 section ROW_INDEX start: 68 length 58
@@ -396,30 +396,30 @@ Stripes:
Stream: column 7 section BLOOM_FILTER start: 6812 length 45
Stream: column 8 section ROW_INDEX start: 6857 length 86
Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
- Stream: column 9 section ROW_INDEX start: 8100 length 51
- Stream: column 9 section BLOOM_FILTER start: 8151 length 62
- Stream: column 10 section ROW_INDEX start: 8213 length 82
- Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
- Stream: column 11 section ROW_INDEX start: 9592 length 47
- Stream: column 11 section BLOOM_FILTER start: 9639 length 308
- Stream: column 1 section PRESENT start: 9947 length 17
- Stream: column 1 section DATA start: 9964 length 962
- Stream: column 2 section PRESENT start: 10926 length 17
- Stream: column 2 section DATA start: 10943 length 1441
- Stream: column 3 section DATA start: 12384 length 1704
- Stream: column 4 section DATA start: 14088 length 1998
- Stream: column 5 section DATA start: 16086 length 2925
- Stream: column 6 section DATA start: 19011 length 3323
- Stream: column 7 section DATA start: 22334 length 137
- Stream: column 8 section DATA start: 22471 length 1572
- Stream: column 8 section LENGTH start: 24043 length 310
- Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
- Stream: column 9 section DATA start: 25901 length 62
- Stream: column 9 section SECONDARY start: 25963 length 1783
- Stream: column 10 section DATA start: 27746 length 2138
- Stream: column 10 section SECONDARY start: 29884 length 231
- Stream: column 11 section DATA start: 30115 length 1877
- Stream: column 11 section LENGTH start: 31992 length 591
+ Stream: column 9 section ROW_INDEX start: 8100 length 50
+ Stream: column 9 section BLOOM_FILTER start: 8150 length 62
+ Stream: column 10 section ROW_INDEX start: 8212 length 82
+ Stream: column 10 section BLOOM_FILTER start: 8294 length 1297
+ Stream: column 11 section ROW_INDEX start: 9591 length 47
+ Stream: column 11 section BLOOM_FILTER start: 9638 length 308
+ Stream: column 1 section PRESENT start: 9946 length 17
+ Stream: column 1 section DATA start: 9963 length 962
+ Stream: column 2 section PRESENT start: 10925 length 17
+ Stream: column 2 section DATA start: 10942 length 1441
+ Stream: column 3 section DATA start: 12383 length 1704
+ Stream: column 4 section DATA start: 14087 length 1998
+ Stream: column 5 section DATA start: 16085 length 2925
+ Stream: column 6 section DATA start: 19010 length 3323
+ Stream: column 7 section DATA start: 22333 length 137
+ Stream: column 8 section DATA start: 22470 length 1572
+ Stream: column 8 section LENGTH start: 24042 length 310
+ Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548
+ Stream: column 9 section DATA start: 25900 length 19
+ Stream: column 9 section SECONDARY start: 25919 length 1783
+ Stream: column 10 section DATA start: 27702 length 2138
+ Stream: column 10 section SECONDARY start: 29840 length 231
+ Stream: column 11 section DATA start: 30071 length 1877
+ Stream: column 11 section LENGTH start: 31948 length 591
Encoding column 0: DIRECT
Encoding column 1: DIRECT
Encoding column 2: DIRECT_V2
@@ -440,7 +440,7 @@ Stripes:
Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5
-File length: 33456 bytes
+File length: 33413 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
diff --git a/ql/src/test/results/clientpositive/orc_merge11.q.out b/ql/src/test/results/clientpositive/orc_merge11.q.out
index da608db..1c4eb0a 100644
--- a/ql/src/test/results/clientpositive/orc_merge11.q.out
+++ b/ql/src/test/results/clientpositive/orc_merge11.q.out
@@ -72,11 +72,11 @@ PREHOOK: Input: default@orcfile_merge1
#### A masked pattern was here ####
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_8732
Rows: 50000
Compression: ZLIB
Compression size: 4096
-Type: struct
+Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>
Stripe Statistics:
Stripe 1:
@@ -96,22 +96,22 @@ File Statistics:
Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0
Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[6]
@@ -120,22 +120,22 @@ Stripes:
Encoding column 5: DIRECT_V2
Row group indices for column 1:
Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
-File length: 11071 bytes
+File length: 6828 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_8732
Rows: 50000
Compression: ZLIB
Compression size: 4096
-Type: struct
+Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>
Stripe Statistics:
Stripe 1:
@@ -155,22 +155,22 @@ File Statistics:
Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0
Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[6]
@@ -179,12 +179,12 @@ Stripes:
Encoding column 5: DIRECT_V2
Row group indices for column 1:
Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
-File length: 11071 bytes
+File length: 6828 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
@@ -213,11 +213,11 @@ PREHOOK: Input: default@orcfile_merge1
#### A masked pattern was here ####
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_8732
Rows: 100000
Compression: ZLIB
Compression size: 4096
-Type: struct
+Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>
Stripe Statistics:
Stripe 1:
@@ -244,22 +244,22 @@ File Statistics:
Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0
Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[6]
@@ -268,26 +268,26 @@ Stripes:
Encoding column 5: DIRECT_V2
Row group indices for column 1:
Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
- Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509
- Stream: column 0 section ROW_INDEX start: 10733 length 17
- Stream: column 1 section ROW_INDEX start: 10750 length 85
- Stream: column 2 section ROW_INDEX start: 10835 length 87
- Stream: column 3 section ROW_INDEX start: 10922 length 111
- Stream: column 4 section ROW_INDEX start: 11033 length 108
- Stream: column 5 section ROW_INDEX start: 11141 length 101
- Stream: column 1 section DATA start: 11242 length 871
- Stream: column 2 section DATA start: 12113 length 362
- Stream: column 2 section LENGTH start: 12475 length 8
- Stream: column 2 section DICTIONARY_DATA start: 12483 length 23
- Stream: column 3 section DATA start: 12506 length 5167
- Stream: column 4 section DATA start: 17673 length 524
- Stream: column 4 section SECONDARY start: 18197 length 118
- Stream: column 5 section DATA start: 18315 length 2913
- Stream: column 5 section SECONDARY start: 21228 length 118
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
+ Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498
+ Stream: column 0 section ROW_INDEX start: 6511 length 17
+ Stream: column 1 section ROW_INDEX start: 6528 length 83
+ Stream: column 2 section ROW_INDEX start: 6611 length 81
+ Stream: column 3 section ROW_INDEX start: 6692 length 111
+ Stream: column 4 section ROW_INDEX start: 6803 length 110
+ Stream: column 5 section ROW_INDEX start: 6913 length 96
+ Stream: column 1 section DATA start: 7009 length 45
+ Stream: column 2 section DATA start: 7054 length 41
+ Stream: column 2 section LENGTH start: 7095 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 7103 length 23
+ Stream: column 3 section DATA start: 7126 length 5167
+ Stream: column 4 section DATA start: 12293 length 524
+ Stream: column 4 section SECONDARY start: 12817 length 18
+ Stream: column 5 section DATA start: 12835 length 53
+ Stream: column 5 section SECONDARY start: 12888 length 18
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[6]
@@ -296,12 +296,12 @@ Stripes:
Encoding column 5: DIRECT_V2
Row group indices for column 1:
Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
-File length: 21814 bytes
+File length: 13348 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
diff --git a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
index da608db..1c4eb0a 100644
--- a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
+++ b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
@@ -72,11 +72,11 @@ PREHOOK: Input: default@orcfile_merge1
#### A masked pattern was here ####
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_8732
Rows: 50000
Compression: ZLIB
Compression size: 4096
-Type: struct
+Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>
Stripe Statistics:
Stripe 1:
@@ -96,22 +96,22 @@ File Statistics:
Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0
Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[6]
@@ -120,22 +120,22 @@ Stripes:
Encoding column 5: DIRECT_V2
Row group indices for column 1:
Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
-File length: 11071 bytes
+File length: 6828 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_8732
Rows: 50000
Compression: ZLIB
Compression size: 4096
-Type: struct
+Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>
Stripe Statistics:
Stripe 1:
@@ -155,22 +155,22 @@ File Statistics:
Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0
Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[6]
@@ -179,12 +179,12 @@ Stripes:
Encoding column 5: DIRECT_V2
Row group indices for column 1:
Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
-File length: 11071 bytes
+File length: 6828 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
@@ -213,11 +213,11 @@ PREHOOK: Input: default@orcfile_merge1
#### A masked pattern was here ####
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_8732
Rows: 100000
Compression: ZLIB
Compression size: 4096
-Type: struct
+Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>
Stripe Statistics:
Stripe 1:
@@ -244,22 +244,22 @@ File Statistics:
Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0
Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[6]
@@ -268,26 +268,26 @@ Stripes:
Encoding column 5: DIRECT_V2
Row group indices for column 1:
Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
- Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509
- Stream: column 0 section ROW_INDEX start: 10733 length 17
- Stream: column 1 section ROW_INDEX start: 10750 length 85
- Stream: column 2 section ROW_INDEX start: 10835 length 87
- Stream: column 3 section ROW_INDEX start: 10922 length 111
- Stream: column 4 section ROW_INDEX start: 11033 length 108
- Stream: column 5 section ROW_INDEX start: 11141 length 101
- Stream: column 1 section DATA start: 11242 length 871
- Stream: column 2 section DATA start: 12113 length 362
- Stream: column 2 section LENGTH start: 12475 length 8
- Stream: column 2 section DICTIONARY_DATA start: 12483 length 23
- Stream: column 3 section DATA start: 12506 length 5167
- Stream: column 4 section DATA start: 17673 length 524
- Stream: column 4 section SECONDARY start: 18197 length 118
- Stream: column 5 section DATA start: 18315 length 2913
- Stream: column 5 section SECONDARY start: 21228 length 118
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
+ Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498
+ Stream: column 0 section ROW_INDEX start: 6511 length 17
+ Stream: column 1 section ROW_INDEX start: 6528 length 83
+ Stream: column 2 section ROW_INDEX start: 6611 length 81
+ Stream: column 3 section ROW_INDEX start: 6692 length 111
+ Stream: column 4 section ROW_INDEX start: 6803 length 110
+ Stream: column 5 section ROW_INDEX start: 6913 length 96
+ Stream: column 1 section DATA start: 7009 length 45
+ Stream: column 2 section DATA start: 7054 length 41
+ Stream: column 2 section LENGTH start: 7095 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 7103 length 23
+ Stream: column 3 section DATA start: 7126 length 5167
+ Stream: column 4 section DATA start: 12293 length 524
+ Stream: column 4 section SECONDARY start: 12817 length 18
+ Stream: column 5 section DATA start: 12835 length 53
+ Stream: column 5 section SECONDARY start: 12888 length 18
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DICTIONARY_V2[6]
@@ -296,12 +296,12 @@ Stripes:
Encoding column 5: DIRECT_V2
Row group indices for column 1:
Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
-File length: 21814 bytes
+File length: 13348 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --