diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java index 6344a66..95f8cc8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java @@ -118,8 +118,8 @@ *
  • 8 bits for lower run length bits
  • * * - *
  • Base value - encoded as varint
  • - *
  • Delta base - encoded as varint
  • + *
  • Base value - zigzag encoded value written as varint
  • + *
  • Delta base - zigzag encoded value written as varint
  • *
  • Delta blob - only positive values. monotonicity and orderness are decided * based on the sign of the base value and delta base
  • * @@ -472,29 +472,28 @@ private void determineEncoding() { // invariant - subtracting any number from any other in the literals after // this point won't overflow + // if min is equal to max then the delta is 0, this condition happens for + // fixed values run >10 which cannot be encoded with SHORT_REPEAT + if (min == max) { + assert isFixedDelta : min + "==" + max + + ", isFixedDelta cannot be false"; + assert currDelta == 0 : min + "==" + max + ", currDelta should be zero"; + fixedDelta = 0; + encoding = EncodingType.DELTA; + return; + } + + if (isFixedDelta) { + assert currDelta == initialDelta + : "currDelta should be equal to initialDelta for fixed delta encoding"; + encoding = EncodingType.DELTA; + fixedDelta = currDelta; + return; + } + // if initialDelta is 0 then we cannot delta encode as we cannot identify // the sign of deltas (increasing or decreasing) if (initialDelta != 0) { - - // if min is equal to max then the delta is 0, this condition happens for - // fixed values run >10 which cannot be encoded with SHORT_REPEAT - if (min == max) { - assert isFixedDelta : min + "==" + max + - ", isFixedDelta cannot be false"; - assert currDelta == 0 : min + "==" + max + ", currDelta should be zero"; - fixedDelta = 0; - encoding = EncodingType.DELTA; - return; - } - - if (isFixedDelta) { - assert currDelta == initialDelta - : "currDelta should be equal to initialDelta for fixed delta encoding"; - encoding = EncodingType.DELTA; - fixedDelta = currDelta; - return; - } - // stores the number of bits required for packing delta blob in // delta encoding bitsDeltaMax = utils.findClosestNumBits(deltaMax); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 255565e..6620a66 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -1895,9 +1895,9 @@ public void testMemoryManagementV12() throws Exception { stripe.getDataLength() < 5000); } // with HIVE-7832, the dictionaries will be disabled after writing the first - // stripe as there are too many distinct values. Hence only 4 stripes as + // stripe as there are too many distinct values. Hence only 3 stripes as // compared to 25 stripes in version 0.11 (above test case) - assertEquals(4, i); + assertEquals(3, i); assertEquals(2500, reader.getNumberOfRows()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java new file mode 100644 index 0000000..1a3559e --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java @@ -0,0 +1,297 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +public class TestRLEv2 { + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + Path testFilePath; + Configuration conf; + FileSystem fs; + + @Rule + public TestName testCaseName = new TestName(); + + @Before + public void openFileSystem () throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + testFilePath = new Path(workDir, "TestRLEv2." + + testCaseName.getMethodName() + ".orc"); + fs.delete(testFilePath, false); + } + + @Test + public void testFixedDeltaZero() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow(123); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123, + // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaOne() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow(i % 512); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0) + // and 1 byte delta (delta = 1). In total, 4 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaOneDescending() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow(512 - (i % 512)); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint) + // and 1 byte delta (delta = 1). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaLarge() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow(i % 512 + ((i % 512 ) * 100)); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0) + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaLargeDescending() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5120; ++i) { + w.addRow((512 - i % 512) + ((i % 512 ) * 100)); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint) + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60")); + System.setOut(origOut); + } + + @Test + public void testShortRepeat() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + for (int i = 0; i < 5; ++i) { + w.addRow(10); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 1 byte header + 1 byte value + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2")); + System.setOut(origOut); + } + + @Test + public void testDeltaUnknownSign() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + w.addRow(0); + for (int i = 0; i < 511; ++i) { + w.addRow(i); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding + // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits + // each, 5120/8 = 640). Total bytes 642 + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642")); + System.setOut(origOut); + } + + @Test + public void testPatchedBase() throws Exception { + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector( + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .inspector(inspector) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + Random rand = new Random(123); + w.addRow(10000000); + for (int i = 0; i < 511; ++i) { + w.addRow(rand.nextInt(i+1)); + } + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // use PATCHED_BASE encoding + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583")); + System.setOut(origOut); + } +} diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out index 44c60b7..2b12ddb 100644 --- a/ql/src/test/resources/orc-file-has-null.out +++ b/ql/src/test/resources/orc-file-has-null.out @@ -29,35 +29,35 @@ File Statistics: Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000 Stripes: - Stripe: offset: 3 data: 241 rows: 5000 tail: 67 index: 163 + Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 64 - Stream: column 2 section ROW_INDEX start: 84 length 82 - Stream: column 1 section DATA start: 166 length 159 - Stream: column 1 section LENGTH start: 325 length 32 - Stream: column 2 section PRESENT start: 357 length 13 - Stream: column 2 section DATA start: 370 length 22 - Stream: column 2 section LENGTH start: 392 length 6 - Stream: column 2 section DICTIONARY_DATA start: 398 length 9 + Stream: column 1 section ROW_INDEX start: 20 length 60 + Stream: column 2 section ROW_INDEX start: 80 length 77 + Stream: column 1 section DATA start: 157 length 159 + Stream: column 1 section LENGTH start: 316 length 15 + Stream: column 2 section PRESENT start: 331 length 13 + Stream: column 2 section DATA start: 344 length 18 + Stream: column 2 section LENGTH start: 362 length 6 + Stream: column 2 section DICTIONARY_DATA start: 368 length 9 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[2] Row group indices for column 2: Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0 - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,66,488 - Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488 - Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,136,488 - Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,136,488 - Stripe: offset: 474 data: 202 rows: 5000 tail: 64 index: 120 - Stream: column 0 section ROW_INDEX start: 474 length 17 - Stream: column 1 section ROW_INDEX start: 491 length 64 - Stream: column 2 section ROW_INDEX start: 555 length 39 - Stream: column 1 section DATA start: 594 length 159 - Stream: column 1 section LENGTH start: 753 length 32 - Stream: column 2 section PRESENT start: 785 length 11 - Stream: column 2 section DATA start: 796 length 0 - Stream: column 2 section LENGTH start: 796 length 0 - Stream: column 2 section DICTIONARY_DATA start: 796 length 0 + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488 + Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488 + Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488 + Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488 + Stripe: offset: 442 data: 185 rows: 5000 tail: 64 index: 116 + Stream: column 0 section ROW_INDEX start: 442 length 17 + Stream: column 1 section ROW_INDEX start: 459 length 60 + Stream: column 2 section ROW_INDEX start: 519 length 39 + Stream: column 1 section DATA start: 558 length 159 + Stream: column 1 section LENGTH start: 717 length 15 + Stream: column 2 section PRESENT start: 732 length 11 + Stream: column 2 section DATA start: 743 length 0 + Stream: column 2 section LENGTH start: 743 length 0 + Stream: column 2 section DICTIONARY_DATA start: 743 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -67,34 +67,34 @@ Stripes: Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 - Stripe: offset: 860 data: 232 rows: 5000 tail: 63 index: 149 - Stream: column 0 section ROW_INDEX start: 860 length 17 - Stream: column 1 section ROW_INDEX start: 877 length 64 - Stream: column 2 section ROW_INDEX start: 941 length 68 - Stream: column 1 section DATA start: 1009 length 159 - Stream: column 1 section LENGTH start: 1168 length 32 - Stream: column 2 section DATA start: 1200 length 24 - Stream: column 2 section LENGTH start: 1224 length 6 - Stream: column 2 section DICTIONARY_DATA start: 1230 length 11 + Stripe: offset: 807 data: 206 rows: 5000 tail: 60 index: 137 + Stream: column 0 section ROW_INDEX start: 807 length 17 + Stream: column 1 section ROW_INDEX start: 824 length 60 + Stream: column 2 section ROW_INDEX start: 884 length 60 + Stream: column 1 section DATA start: 944 length 159 + Stream: column 1 section LENGTH start: 1103 length 15 + Stream: column 2 section DATA start: 1118 length 15 + Stream: column 2 section LENGTH start: 1133 length 6 + Stream: column 2 section DICTIONARY_DATA start: 1139 length 11 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[1] Row group indices for column 2: Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,66,488 - Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464 - Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440 - Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416 - Stripe: offset: 1304 data: 202 rows: 5000 tail: 64 index: 120 - Stream: column 0 section ROW_INDEX start: 1304 length 17 - Stream: column 1 section ROW_INDEX start: 1321 length 64 - Stream: column 2 section ROW_INDEX start: 1385 length 39 - Stream: column 1 section DATA start: 1424 length 159 - Stream: column 1 section LENGTH start: 1583 length 32 - Stream: column 2 section PRESENT start: 1615 length 11 - Stream: column 2 section DATA start: 1626 length 0 - Stream: column 2 section LENGTH start: 1626 length 0 - Stream: column 2 section DICTIONARY_DATA start: 1626 length 0 + Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488 + Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464 + Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440 + Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416 + Stripe: offset: 1210 data: 185 rows: 5000 tail: 64 index: 116 + Stream: column 0 section ROW_INDEX start: 1210 length 17 + Stream: column 1 section ROW_INDEX start: 1227 length 60 + Stream: column 2 section ROW_INDEX start: 1287 length 39 + Stream: column 1 section DATA start: 1326 length 159 + Stream: column 1 section LENGTH start: 1485 length 15 + Stream: column 2 section PRESENT start: 1500 length 11 + Stream: column 2 section DATA start: 1511 length 0 + Stream: column 2 section LENGTH start: 1511 length 0 + Stream: column 2 section DICTIONARY_DATA start: 1511 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -105,6 +105,6 @@ Stripes: Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 -File length: 1940 bytes +File length: 1823 bytes Padding length: 0 bytes Padding ratio: 0% diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out index 67aa189..7503c81 100644 --- a/ql/src/test/results/clientpositive/orc_file_dump.q.out +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out @@ -129,7 +129,7 @@ File Statistics: Column 11: count: 1049 hasNull: false sum: 13278 Stripes: - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944 + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943 Stream: column 0 section ROW_INDEX start: 3 length 20 Stream: column 0 section BLOOM_FILTER start: 23 length 45 Stream: column 1 section ROW_INDEX start: 68 length 58 @@ -148,30 +148,30 @@ Stripes: Stream: column 7 section BLOOM_FILTER start: 6812 length 45 Stream: column 8 section ROW_INDEX start: 6857 length 86 Stream: column 8 section BLOOM_FILTER start: 6943 length 1157 - Stream: column 9 section ROW_INDEX start: 8100 length 51 - Stream: column 9 section BLOOM_FILTER start: 8151 length 62 - Stream: column 10 section ROW_INDEX start: 8213 length 82 - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297 - Stream: column 11 section ROW_INDEX start: 9592 length 47 - Stream: column 11 section BLOOM_FILTER start: 9639 length 308 - Stream: column 1 section PRESENT start: 9947 length 17 - Stream: column 1 section DATA start: 9964 length 962 - Stream: column 2 section PRESENT start: 10926 length 17 - Stream: column 2 section DATA start: 10943 length 1441 - Stream: column 3 section DATA start: 12384 length 1704 - Stream: column 4 section DATA start: 14088 length 1998 - Stream: column 5 section DATA start: 16086 length 2925 - Stream: column 6 section DATA start: 19011 length 3323 - Stream: column 7 section DATA start: 22334 length 137 - Stream: column 8 section DATA start: 22471 length 1572 - Stream: column 8 section LENGTH start: 24043 length 310 - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548 - Stream: column 9 section DATA start: 25901 length 62 - Stream: column 9 section SECONDARY start: 25963 length 1783 - Stream: column 10 section DATA start: 27746 length 2138 - Stream: column 10 section SECONDARY start: 29884 length 231 - Stream: column 11 section DATA start: 30115 length 1877 - Stream: column 11 section LENGTH start: 31992 length 591 + Stream: column 9 section ROW_INDEX start: 8100 length 50 + Stream: column 9 section BLOOM_FILTER start: 8150 length 62 + Stream: column 10 section ROW_INDEX start: 8212 length 82 + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297 + Stream: column 11 section ROW_INDEX start: 9591 length 47 + Stream: column 11 section BLOOM_FILTER start: 9638 length 308 + Stream: column 1 section PRESENT start: 9946 length 17 + Stream: column 1 section DATA start: 9963 length 962 + Stream: column 2 section PRESENT start: 10925 length 17 + Stream: column 2 section DATA start: 10942 length 1441 + Stream: column 3 section DATA start: 12383 length 1704 + Stream: column 4 section DATA start: 14087 length 1998 + Stream: column 5 section DATA start: 16085 length 2925 + Stream: column 6 section DATA start: 19010 length 3323 + Stream: column 7 section DATA start: 22333 length 137 + Stream: column 8 section DATA start: 22470 length 1572 + Stream: column 8 section LENGTH start: 24042 length 310 + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548 + Stream: column 9 section DATA start: 25900 length 19 + Stream: column 9 section SECONDARY start: 25919 length 1783 + Stream: column 10 section DATA start: 27702 length 2138 + Stream: column 10 section SECONDARY start: 29840 length 231 + Stream: column 11 section DATA start: 30071 length 1877 + Stream: column 11 section LENGTH start: 31948 length 591 Encoding column 0: DIRECT Encoding column 1: DIRECT Encoding column 2: DIRECT_V2 @@ -192,7 +192,7 @@ Stripes: Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5 -File length: 33456 bytes +File length: 33413 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -247,7 +247,7 @@ File Statistics: Column 11: count: 1049 hasNull: false sum: 13278 Stripes: - Stripe: offset: 3 data: 22636 rows: 1049 tail: 251 index: 15096 + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 15095 Stream: column 0 section ROW_INDEX start: 3 length 20 Stream: column 0 section BLOOM_FILTER start: 23 length 56 Stream: column 1 section ROW_INDEX start: 79 length 58 @@ -266,30 +266,30 @@ Stripes: Stream: column 7 section BLOOM_FILTER start: 10385 length 56 Stream: column 8 section ROW_INDEX start: 10441 length 86 Stream: column 8 section BLOOM_FILTER start: 10527 length 1829 - Stream: column 9 section ROW_INDEX start: 12356 length 51 - Stream: column 9 section BLOOM_FILTER start: 12407 length 95 - Stream: column 10 section ROW_INDEX start: 12502 length 82 - Stream: column 10 section BLOOM_FILTER start: 12584 length 1994 - Stream: column 11 section ROW_INDEX start: 14578 length 47 - Stream: column 11 section BLOOM_FILTER start: 14625 length 474 - Stream: column 1 section PRESENT start: 15099 length 17 - Stream: column 1 section DATA start: 15116 length 962 - Stream: column 2 section PRESENT start: 16078 length 17 - Stream: column 2 section DATA start: 16095 length 1441 - Stream: column 3 section DATA start: 17536 length 1704 - Stream: column 4 section DATA start: 19240 length 1998 - Stream: column 5 section DATA start: 21238 length 2925 - Stream: column 6 section DATA start: 24163 length 3323 - Stream: column 7 section DATA start: 27486 length 137 - Stream: column 8 section DATA start: 27623 length 1572 - Stream: column 8 section LENGTH start: 29195 length 310 - Stream: column 8 section DICTIONARY_DATA start: 29505 length 1548 - Stream: column 9 section DATA start: 31053 length 62 - Stream: column 9 section SECONDARY start: 31115 length 1783 - Stream: column 10 section DATA start: 32898 length 2138 - Stream: column 10 section SECONDARY start: 35036 length 231 - Stream: column 11 section DATA start: 35267 length 1877 - Stream: column 11 section LENGTH start: 37144 length 591 + Stream: column 9 section ROW_INDEX start: 12356 length 50 + Stream: column 9 section BLOOM_FILTER start: 12406 length 95 + Stream: column 10 section ROW_INDEX start: 12501 length 82 + Stream: column 10 section BLOOM_FILTER start: 12583 length 1994 + Stream: column 11 section ROW_INDEX start: 14577 length 47 + Stream: column 11 section BLOOM_FILTER start: 14624 length 474 + Stream: column 1 section PRESENT start: 15098 length 17 + Stream: column 1 section DATA start: 15115 length 962 + Stream: column 2 section PRESENT start: 16077 length 17 + Stream: column 2 section DATA start: 16094 length 1441 + Stream: column 3 section DATA start: 17535 length 1704 + Stream: column 4 section DATA start: 19239 length 1998 + Stream: column 5 section DATA start: 21237 length 2925 + Stream: column 6 section DATA start: 24162 length 3323 + Stream: column 7 section DATA start: 27485 length 137 + Stream: column 8 section DATA start: 27622 length 1572 + Stream: column 8 section LENGTH start: 29194 length 310 + Stream: column 8 section DICTIONARY_DATA start: 29504 length 1548 + Stream: column 9 section DATA start: 31052 length 19 + Stream: column 9 section SECONDARY start: 31071 length 1783 + Stream: column 10 section DATA start: 32854 length 2138 + Stream: column 10 section SECONDARY start: 34992 length 231 + Stream: column 11 section DATA start: 35223 length 1877 + Stream: column 11 section LENGTH start: 37100 length 591 Encoding column 0: DIRECT Encoding column 1: DIRECT Encoding column 2: DIRECT_V2 @@ -310,7 +310,7 @@ Stripes: Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8 -File length: 38610 bytes +File length: 38565 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -377,7 +377,7 @@ File Statistics: Column 11: count: 1049 hasNull: false sum: 13278 Stripes: - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944 + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943 Stream: column 0 section ROW_INDEX start: 3 length 20 Stream: column 0 section BLOOM_FILTER start: 23 length 45 Stream: column 1 section ROW_INDEX start: 68 length 58 @@ -396,30 +396,30 @@ Stripes: Stream: column 7 section BLOOM_FILTER start: 6812 length 45 Stream: column 8 section ROW_INDEX start: 6857 length 86 Stream: column 8 section BLOOM_FILTER start: 6943 length 1157 - Stream: column 9 section ROW_INDEX start: 8100 length 51 - Stream: column 9 section BLOOM_FILTER start: 8151 length 62 - Stream: column 10 section ROW_INDEX start: 8213 length 82 - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297 - Stream: column 11 section ROW_INDEX start: 9592 length 47 - Stream: column 11 section BLOOM_FILTER start: 9639 length 308 - Stream: column 1 section PRESENT start: 9947 length 17 - Stream: column 1 section DATA start: 9964 length 962 - Stream: column 2 section PRESENT start: 10926 length 17 - Stream: column 2 section DATA start: 10943 length 1441 - Stream: column 3 section DATA start: 12384 length 1704 - Stream: column 4 section DATA start: 14088 length 1998 - Stream: column 5 section DATA start: 16086 length 2925 - Stream: column 6 section DATA start: 19011 length 3323 - Stream: column 7 section DATA start: 22334 length 137 - Stream: column 8 section DATA start: 22471 length 1572 - Stream: column 8 section LENGTH start: 24043 length 310 - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548 - Stream: column 9 section DATA start: 25901 length 62 - Stream: column 9 section SECONDARY start: 25963 length 1783 - Stream: column 10 section DATA start: 27746 length 2138 - Stream: column 10 section SECONDARY start: 29884 length 231 - Stream: column 11 section DATA start: 30115 length 1877 - Stream: column 11 section LENGTH start: 31992 length 591 + Stream: column 9 section ROW_INDEX start: 8100 length 50 + Stream: column 9 section BLOOM_FILTER start: 8150 length 62 + Stream: column 10 section ROW_INDEX start: 8212 length 82 + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297 + Stream: column 11 section ROW_INDEX start: 9591 length 47 + Stream: column 11 section BLOOM_FILTER start: 9638 length 308 + Stream: column 1 section PRESENT start: 9946 length 17 + Stream: column 1 section DATA start: 9963 length 962 + Stream: column 2 section PRESENT start: 10925 length 17 + Stream: column 2 section DATA start: 10942 length 1441 + Stream: column 3 section DATA start: 12383 length 1704 + Stream: column 4 section DATA start: 14087 length 1998 + Stream: column 5 section DATA start: 16085 length 2925 + Stream: column 6 section DATA start: 19010 length 3323 + Stream: column 7 section DATA start: 22333 length 137 + Stream: column 8 section DATA start: 22470 length 1572 + Stream: column 8 section LENGTH start: 24042 length 310 + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548 + Stream: column 9 section DATA start: 25900 length 19 + Stream: column 9 section SECONDARY start: 25919 length 1783 + Stream: column 10 section DATA start: 27702 length 2138 + Stream: column 10 section SECONDARY start: 29840 length 231 + Stream: column 11 section DATA start: 30071 length 1877 + Stream: column 11 section LENGTH start: 31948 length 591 Encoding column 0: DIRECT Encoding column 1: DIRECT Encoding column 2: DIRECT_V2 @@ -440,7 +440,7 @@ Stripes: Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5 -File length: 33456 bytes +File length: 33413 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- diff --git a/ql/src/test/results/clientpositive/orc_merge11.q.out b/ql/src/test/results/clientpositive/orc_merge11.q.out index da608db..1c4eb0a 100644 --- a/ql/src/test/results/clientpositive/orc_merge11.q.out +++ b/ql/src/test/results/clientpositive/orc_merge11.q.out @@ -72,11 +72,11 @@ PREHOOK: Input: default@orcfile_merge1 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_4243 +File Version: 0.12 with HIVE_8732 Rows: 50000 Compression: ZLIB Compression size: 4096 -Type: struct +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp> Stripe Statistics: Stripe 1: @@ -96,22 +96,22 @@ File Statistics: Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -120,22 +120,22 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 11071 bytes +File length: 6828 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_4243 +File Version: 0.12 with HIVE_8732 Rows: 50000 Compression: ZLIB Compression size: 4096 -Type: struct +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp> Stripe Statistics: Stripe 1: @@ -155,22 +155,22 @@ File Statistics: Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -179,12 +179,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 11071 bytes +File length: 6828 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -213,11 +213,11 @@ PREHOOK: Input: default@orcfile_merge1 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_4243 +File Version: 0.12 with HIVE_8732 Rows: 100000 Compression: ZLIB Compression size: 4096 -Type: struct +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp> Stripe Statistics: Stripe 1: @@ -244,22 +244,22 @@ File Statistics: Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -268,26 +268,26 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 - Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509 - Stream: column 0 section ROW_INDEX start: 10733 length 17 - Stream: column 1 section ROW_INDEX start: 10750 length 85 - Stream: column 2 section ROW_INDEX start: 10835 length 87 - Stream: column 3 section ROW_INDEX start: 10922 length 111 - Stream: column 4 section ROW_INDEX start: 11033 length 108 - Stream: column 5 section ROW_INDEX start: 11141 length 101 - Stream: column 1 section DATA start: 11242 length 871 - Stream: column 2 section DATA start: 12113 length 362 - Stream: column 2 section LENGTH start: 12475 length 8 - Stream: column 2 section DICTIONARY_DATA start: 12483 length 23 - Stream: column 3 section DATA start: 12506 length 5167 - Stream: column 4 section DATA start: 17673 length 524 - Stream: column 4 section SECONDARY start: 18197 length 118 - Stream: column 5 section DATA start: 18315 length 2913 - Stream: column 5 section SECONDARY start: 21228 length 118 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 + Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498 + Stream: column 0 section ROW_INDEX start: 6511 length 17 + Stream: column 1 section ROW_INDEX start: 6528 length 83 + Stream: column 2 section ROW_INDEX start: 6611 length 81 + Stream: column 3 section ROW_INDEX start: 6692 length 111 + Stream: column 4 section ROW_INDEX start: 6803 length 110 + Stream: column 5 section ROW_INDEX start: 6913 length 96 + Stream: column 1 section DATA start: 7009 length 45 + Stream: column 2 section DATA start: 7054 length 41 + Stream: column 2 section LENGTH start: 7095 length 8 + Stream: column 2 section DICTIONARY_DATA start: 7103 length 23 + Stream: column 3 section DATA start: 7126 length 5167 + Stream: column 4 section DATA start: 12293 length 524 + Stream: column 4 section SECONDARY start: 12817 length 18 + Stream: column 5 section DATA start: 12835 length 53 + Stream: column 5 section SECONDARY start: 12888 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -296,12 +296,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 21814 bytes +File length: 13348 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- diff --git a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out index da608db..1c4eb0a 100644 --- a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out +++ b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out @@ -72,11 +72,11 @@ PREHOOK: Input: default@orcfile_merge1 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_4243 +File Version: 0.12 with HIVE_8732 Rows: 50000 Compression: ZLIB Compression size: 4096 -Type: struct +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp> Stripe Statistics: Stripe 1: @@ -96,22 +96,22 @@ File Statistics: Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -120,22 +120,22 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 11071 bytes +File length: 6828 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_4243 +File Version: 0.12 with HIVE_8732 Rows: 50000 Compression: ZLIB Compression size: 4096 -Type: struct +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp> Stripe Statistics: Stripe 1: @@ -155,22 +155,22 @@ File Statistics: Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -179,12 +179,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 11071 bytes +File length: 6828 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP -- @@ -213,11 +213,11 @@ PREHOOK: Input: default@orcfile_merge1 #### A masked pattern was here #### -- BEGIN ORC FILE DUMP -- #### A masked pattern was here #### -File Version: 0.12 with HIVE_4243 +File Version: 0.12 with HIVE_8732 Rows: 100000 Compression: ZLIB Compression size: 4096 -Type: struct +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp> Stripe Statistics: Stripe 1: @@ -244,22 +244,22 @@ File Statistics: Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0 Stripes: - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509 + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 85 - Stream: column 2 section ROW_INDEX start: 105 length 87 - Stream: column 3 section ROW_INDEX start: 192 length 111 - Stream: column 4 section ROW_INDEX start: 303 length 108 - Stream: column 5 section ROW_INDEX start: 411 length 101 - Stream: column 1 section DATA start: 512 length 871 - Stream: column 2 section DATA start: 1383 length 362 - Stream: column 2 section LENGTH start: 1745 length 8 - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23 - Stream: column 3 section DATA start: 1776 length 5167 - Stream: column 4 section DATA start: 6943 length 524 - Stream: column 4 section SECONDARY start: 7467 length 118 - Stream: column 5 section DATA start: 7585 length 2913 - Stream: column 5 section SECONDARY start: 10498 length 118 + Stream: column 1 section ROW_INDEX start: 20 length 83 + Stream: column 2 section ROW_INDEX start: 103 length 81 + Stream: column 3 section ROW_INDEX start: 184 length 111 + Stream: column 4 section ROW_INDEX start: 295 length 110 + Stream: column 5 section ROW_INDEX start: 405 length 96 + Stream: column 1 section DATA start: 501 length 45 + Stream: column 2 section DATA start: 546 length 41 + Stream: column 2 section LENGTH start: 587 length 8 + Stream: column 2 section DICTIONARY_DATA start: 595 length 23 + Stream: column 3 section DATA start: 618 length 5167 + Stream: column 4 section DATA start: 5785 length 524 + Stream: column 4 section SECONDARY start: 6309 length 18 + Stream: column 5 section DATA start: 6327 length 53 + Stream: column 5 section SECONDARY start: 6380 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -268,26 +268,26 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 - Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509 - Stream: column 0 section ROW_INDEX start: 10733 length 17 - Stream: column 1 section ROW_INDEX start: 10750 length 85 - Stream: column 2 section ROW_INDEX start: 10835 length 87 - Stream: column 3 section ROW_INDEX start: 10922 length 111 - Stream: column 4 section ROW_INDEX start: 11033 length 108 - Stream: column 5 section ROW_INDEX start: 11141 length 101 - Stream: column 1 section DATA start: 11242 length 871 - Stream: column 2 section DATA start: 12113 length 362 - Stream: column 2 section LENGTH start: 12475 length 8 - Stream: column 2 section DICTIONARY_DATA start: 12483 length 23 - Stream: column 3 section DATA start: 12506 length 5167 - Stream: column 4 section DATA start: 17673 length 524 - Stream: column 4 section SECONDARY start: 18197 length 118 - Stream: column 5 section DATA start: 18315 length 2913 - Stream: column 5 section SECONDARY start: 21228 length 118 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 + Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498 + Stream: column 0 section ROW_INDEX start: 6511 length 17 + Stream: column 1 section ROW_INDEX start: 6528 length 83 + Stream: column 2 section ROW_INDEX start: 6611 length 81 + Stream: column 3 section ROW_INDEX start: 6692 length 111 + Stream: column 4 section ROW_INDEX start: 6803 length 110 + Stream: column 5 section ROW_INDEX start: 6913 length 96 + Stream: column 1 section DATA start: 7009 length 45 + Stream: column 2 section DATA start: 7054 length 41 + Stream: column 2 section LENGTH start: 7095 length 8 + Stream: column 2 section DICTIONARY_DATA start: 7103 length 23 + Stream: column 3 section DATA start: 7126 length 5167 + Stream: column 4 section DATA start: 12293 length 524 + Stream: column 4 section SECONDARY start: 12817 length 18 + Stream: column 5 section DATA start: 12835 length 53 + Stream: column 5 section SECONDARY start: 12888 length 18 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[6] @@ -296,12 +296,12 @@ Stripes: Encoding column 5: DIRECT_V2 Row group indices for column 1: Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0 - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391 - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391 - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391 - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391 + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391 + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391 + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391 + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391 -File length: 21814 bytes +File length: 13348 bytes Padding length: 0 bytes Padding ratio: 0% -- END ORC FILE DUMP --