diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java index 5bd4599..6344a66 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java @@ -418,138 +418,120 @@ private void writeShortRepeatValues() throws IOException { private void determineEncoding() { - int idx = 0; + // we need to compute zigzag values for DIRECT encoding if we decide to + // break early for delta overflows or for shorter runs + computeZigZagLiterals(); + + zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 1.0); + + // not a big win for shorter runs to determine encoding + if (numLiterals <= MIN_REPEAT) { + encoding = EncodingType.DIRECT; + return; + } + + // DELTA encoding check // for identifying monotonic sequences - boolean isIncreasing = false; - int increasingCount = 1; - boolean isDecreasing = false; - int decreasingCount = 1; + boolean isIncreasing = true; + boolean isDecreasing = true; + this.isFixedDelta = true; - // for identifying type of delta encoding - min = literals[0]; + this.min = literals[0]; long max = literals[0]; - isFixedDelta = true; - long currDelta = 0; - - min = literals[0]; - long deltaMax = 0; - - // populate all variables to identify the encoding type - if (numLiterals >= 1) { - currDelta = literals[1] - literals[0]; - for(int i = 0; i < numLiterals; i++) { - if (i > 0 && literals[i] >= max) { - max = literals[i]; - increasingCount++; - } - - if (i > 0 && literals[i] <= min) { - min = literals[i]; - decreasingCount++; - } + final long initialDelta = literals[1] - literals[0]; + long currDelta = initialDelta; + long deltaMax = initialDelta; + this.adjDeltas[0] = initialDelta; + + for (int i = 1; i < numLiterals; i++) { + final long l1 = literals[i]; + final long l0 = literals[i - 1]; + currDelta = l1 - l0; + min = Math.min(min, l1); + max = Math.max(max, l1); + + isIncreasing &= (l0 <= l1); + isDecreasing &= (l0 >= l1); + + isFixedDelta &= (currDelta == initialDelta); + if (i > 1) { + adjDeltas[i - 1] = Math.abs(currDelta); + deltaMax = Math.max(deltaMax, adjDeltas[i - 1]); + } + } - // if delta doesn't changes then mark it as fixed delta - if (i > 0 && isFixedDelta) { - if (literals[i] - literals[i - 1] != currDelta) { - isFixedDelta = false; - } + // its faster to exit under delta overflow condition without checking for + // PATCHED_BASE condition as encoding using DIRECT is faster and has less + // overhead than PATCHED_BASE + if (!utils.isSafeSubtract(max, min)) { + encoding = EncodingType.DIRECT; + return; + } - fixedDelta = currDelta; - } + // invariant - subtracting any number from any other in the literals after + // this point won't overflow + + // if initialDelta is 0 then we cannot delta encode as we cannot identify + // the sign of deltas (increasing or decreasing) + if (initialDelta != 0) { + + // if min is equal to max then the delta is 0, this condition happens for + // fixed values run >10 which cannot be encoded with SHORT_REPEAT + if (min == max) { + assert isFixedDelta : min + "==" + max + + ", isFixedDelta cannot be false"; + assert currDelta == 0 : min + "==" + max + ", currDelta should be zero"; + fixedDelta = 0; + encoding = EncodingType.DELTA; + return; + } - // populate zigzag encoded literals - long zzEncVal = 0; - if (signed) { - zzEncVal = utils.zigzagEncode(literals[i]); - } else { - zzEncVal = literals[i]; - } - zigzagLiterals[idx] = zzEncVal; - idx++; - - // max delta value is required for computing the fixed bits - // required for delta blob in delta encoding - if (i > 0) { - if (i == 1) { - // first value preserve the sign - adjDeltas[i - 1] = literals[i] - literals[i - 1]; - } else { - adjDeltas[i - 1] = Math.abs(literals[i] - literals[i - 1]); - if (adjDeltas[i - 1] > deltaMax) { - deltaMax = adjDeltas[i - 1]; - } - } - } + if (isFixedDelta) { + assert currDelta == initialDelta + : "currDelta should be equal to initialDelta for fixed delta encoding"; + encoding = EncodingType.DELTA; + fixedDelta = currDelta; + return; } // stores the number of bits required for packing delta blob in // delta encoding bitsDeltaMax = utils.findClosestNumBits(deltaMax); - // if decreasing count equals total number of literals then the - // sequence is monotonically decreasing - if (increasingCount == 1 && decreasingCount == numLiterals) { - isDecreasing = true; - } - - // if increasing count equals total number of literals then the - // sequence is monotonically increasing - if (decreasingCount == 1 && increasingCount == numLiterals) { - isIncreasing = true; + // monotonic condition + if (isIncreasing || isDecreasing) { + encoding = EncodingType.DELTA; + return; } } - // if the sequence is both increasing and decreasing then it is not - // monotonic - if (isDecreasing && isIncreasing) { - isDecreasing = false; - isIncreasing = false; - } - - // fixed delta condition - if (isIncreasing == false && isDecreasing == false && isFixedDelta == true) { - encoding = EncodingType.DELTA; - return; - } - - // monotonic condition - if (isIncreasing || isDecreasing) { - encoding = EncodingType.DELTA; - return; - } + // PATCHED_BASE encoding check // percentile values are computed for the zigzag encoded values. if the // number of bit requirement between 90th and 100th percentile varies // beyond a threshold then we need to patch the values. if the variation - // is not significant then we can use direct or delta encoding - - double p = 0.9; - zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p); - - p = 1.0; - zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p); + // is not significant then we can use direct encoding + zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 0.9); int diffBitsLH = zzBits100p - zzBits90p; // if the difference between 90th percentile and 100th percentile fixed // bits is > 1 then we need patch the values - if (isIncreasing == false && isDecreasing == false && diffBitsLH > 1 - && isFixedDelta == false) { + if (diffBitsLH > 1) { + // patching is done only on base reduced values. // remove base from literals - for(int i = 0; i < numLiterals; i++) { + for (int i = 0; i < numLiterals; i++) { baseRedLiterals[i] = literals[i] - min; } // 95th percentile width is used to determine max allowed value // after which patching will be done - p = 0.95; - brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p); + brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.95); // 100th percentile is used to compute the max patch width - p = 1.0; - brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p); + brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 1.0); // after base reducing the values, if the difference in bits between // 95th percentile and 100th percentile value is zero then there @@ -565,19 +547,24 @@ private void determineEncoding() { encoding = EncodingType.DIRECT; return; } - } - - // if difference in bits between 95th percentile and 100th percentile is - // 0, then patch length will become 0. Hence we will fallback to direct - if (isIncreasing == false && isDecreasing == false && diffBitsLH <= 1 - && isFixedDelta == false) { + } else { + // if difference in bits between 95th percentile and 100th percentile is + // 0, then patch length will become 0. Hence we will fallback to direct encoding = EncodingType.DIRECT; return; } + } - // this should not happen - if (encoding == null) { - throw new RuntimeException("Integer encoding cannot be determined."); + private void computeZigZagLiterals() { + // populate zigzag encoded literals + long zzEncVal = 0; + for (int i = 0; i < numLiterals; i++) { + if (signed) { + zzEncVal = utils.zigzagEncode(literals[i]); + } else { + zzEncVal = literals[i]; + } + zigzagLiterals[i] = zzEncVal; } } @@ -700,7 +687,7 @@ private void clear() { patchWidth = 0; gapVsPatchList = null; min = 0; - isFixedDelta = false; + isFixedDelta = true; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java index b5380c0..b14fa7b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java @@ -1283,4 +1283,9 @@ private long readLongBE8(InStream in, int rbOffset) { + ((readBuffer[rbOffset + 7] & 255) << 0)); } + // Do not want to use Guava LongMath.checkedSubtract() here as it will throw + // ArithmeticException in case of overflow + public boolean isSafeSubtract(long left, long right) { + return (left ^ right) >= 0 | (left ^ (left - right)) >= 0; + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 5fedb62..55392c9 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -1633,7 +1633,7 @@ public void testCombinationInputFormatWithAcid() throws Exception { assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0", combineSplit.getPath(bucket).toString()); assertEquals(0, combineSplit.getOffset(bucket)); - assertEquals(227, combineSplit.getLength(bucket)); + assertEquals(225, combineSplit.getLength(bucket)); } String[] hosts = combineSplit.getLocations(); assertEquals(2, hosts.length); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java index 6d6f132..0f606a4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java @@ -335,6 +335,104 @@ public void testBasicDelta4() throws Exception { } @Test + public void testDeltaOverflow() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory + .getReflectionObjectInspector(Long.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + long[] inp = new long[]{4513343538618202719l, 4513343538618202711l, + 2911390882471569739l, + -9181829309989854913l}; + List input = Lists.newArrayList(Longs.asList(inp)); + + Writer writer = OrcFile.createWriter( + testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000) + .compress(CompressionKind.NONE).bufferSize(10000)); + for (Long l : input) { + writer.addRow(l); + } + writer.close(); + + Reader reader = OrcFile + .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); + RecordReader rows = reader.rows(); + int idx = 0; + while (rows.hasNext()) { + Object row = rows.next(null); + assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get()); + } + } + + @Test + public void testDeltaOverflow2() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory + .getReflectionObjectInspector(Long.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + long[] inp = new long[]{Long.MAX_VALUE, 4513343538618202711l, + 2911390882471569739l, + Long.MIN_VALUE}; + List input = Lists.newArrayList(Longs.asList(inp)); + + Writer writer = OrcFile.createWriter( + testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000) + .compress(CompressionKind.NONE).bufferSize(10000)); + for (Long l : input) { + writer.addRow(l); + } + writer.close(); + + Reader reader = OrcFile + .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); + RecordReader rows = reader.rows(); + int idx = 0; + while (rows.hasNext()) { + Object row = rows.next(null); + assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get()); + } + } + + @Test + public void testDeltaOverflow3() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory + .getReflectionObjectInspector(Long.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + long[] inp = new long[]{-4513343538618202711l, -2911390882471569739l, -2, + Long.MAX_VALUE}; + List input = Lists.newArrayList(Longs.asList(inp)); + + Writer writer = OrcFile.createWriter( + testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000) + .compress(CompressionKind.NONE).bufferSize(10000)); + for (Long l : input) { + writer.addRow(l); + } + writer.close(); + + Reader reader = OrcFile + .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); + RecordReader rows = reader.rows(); + int idx = 0; + while (rows.hasNext()) { + Object row = rows.next(null); + assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get()); + } + } + + @Test public void testIntegerMin() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index b8b92b7..7aeaf9f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -1754,9 +1754,9 @@ public void testMemoryManagementV12() throws Exception { stripe.getDataLength() < 5000); } // with HIVE-7832, the dictionaries will be disabled after writing the first - // stripe as there are too many distinct values. Hence only 3 stripes as + // stripe as there are too many distinct values. Hence only 4 stripes as // compared to 25 stripes in version 0.11 (above test case) - assertEquals(3, i); + assertEquals(4, i); assertEquals(2500, reader.getNumberOfRows()); } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java index 4a49f09..b3f9cf1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java @@ -17,15 +17,18 @@ */ package org.apache.hadoop.hive.ql.io.orc; -import org.junit.Test; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.math.BigInteger; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; +import org.junit.Test; + +import com.google.common.math.LongMath; public class TestSerializationUtils { @@ -112,6 +115,47 @@ public void testBigIntegers() throws Exception { SerializationUtils.readBigInteger(fromBuffer(buffer))); } + @Test + public void testSubtractionOverflow() { + // cross check results with Guava results below + SerializationUtils utils = new SerializationUtils(); + assertEquals(false, utils.isSafeSubtract(22222222222L, Long.MIN_VALUE)); + assertEquals(false, utils.isSafeSubtract(-22222222222L, Long.MAX_VALUE)); + assertEquals(false, utils.isSafeSubtract(Long.MIN_VALUE, Long.MAX_VALUE)); + assertEquals(true, utils.isSafeSubtract(-1553103058346370095L, 6553103058346370095L)); + assertEquals(true, utils.isSafeSubtract(0, Long.MAX_VALUE)); + assertEquals(true, utils.isSafeSubtract(Long.MIN_VALUE, 0)); + } + + @Test + public void testSubtractionOverflowGuava() { + try { + LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE); + fail("expected ArithmeticException for overflow"); + } catch (ArithmeticException ex) { + assertEquals(ex.getMessage(), "overflow"); + } + + try { + LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE); + fail("expected ArithmeticException for overflow"); + } catch (ArithmeticException ex) { + assertEquals(ex.getMessage(), "overflow"); + } + + try { + LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE); + fail("expected ArithmeticException for overflow"); + } catch (ArithmeticException ex) { + assertEquals(ex.getMessage(), "overflow"); + } + + assertEquals(-8106206116692740190L, + LongMath.checkedSubtract(-1553103058346370095L, 6553103058346370095L)); + assertEquals(-Long.MAX_VALUE, LongMath.checkedSubtract(0, Long.MAX_VALUE)); + assertEquals(Long.MIN_VALUE, LongMath.checkedSubtract(Long.MIN_VALUE, 0)); + } + public static void main(String[] args) throws Exception { TestSerializationUtils test = new TestSerializationUtils(); test.testDoubles(); diff --git ql/src/test/resources/orc-file-dump-dictionary-threshold.out ql/src/test/resources/orc-file-dump-dictionary-threshold.out index 965d283..380f0e0 100644 --- ql/src/test/resources/orc-file-dump-dictionary-threshold.out +++ ql/src/test/resources/orc-file-dump-dictionary-threshold.out @@ -38,7 +38,7 @@ File Statistics: Column 3: count: 21000 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 6910238 Stripes: - Stripe: offset: 3 data: 151109 rows: 5000 tail: 68 index: 704 + Stripe: offset: 3 data: 151108 rows: 5000 tail: 68 index: 704 Stream: column 0 section ROW_INDEX start: 3 length 15 Stream: column 1 section ROW_INDEX start: 18 length 156 Stream: column 2 section ROW_INDEX start: 174 length 172 @@ -46,7 +46,7 @@ Stripes: Stream: column 1 section DATA start: 707 length 20029 Stream: column 2 section DATA start: 20736 length 40035 Stream: column 3 section DATA start: 60771 length 86757 - Stream: column 3 section LENGTH start: 147528 length 4288 + Stream: column 3 section LENGTH start: 147528 length 4287 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -65,19 +65,19 @@ Stripes: Entry 4: count: 1000 min: -9216505819108477308 max: 9196474183833079923 positions: 20006,8686,416 Row group index column 3: Entry 0: count: 1000 min: Darkness,-230 max: worst-54-290-346-648-908-996 positions: 0,0,0,0,0 - Entry 1: count: 1000 min: Darkness,-230-368-488-586-862-930-1686 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966 positions: 2777,8442,0,696,18 - Entry 2: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660 positions: 13595,4780,0,1555,14 - Entry 3: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 positions: 31432,228,0,2373,90 - Entry 4: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 positions: 54111,5096,0,3355,108 - Stripe: offset: 151884 data: 336358 rows: 5000 tail: 69 index: 941 - Stream: column 0 section ROW_INDEX start: 151884 length 15 - Stream: column 1 section ROW_INDEX start: 151899 length 150 - Stream: column 2 section ROW_INDEX start: 152049 length 167 - Stream: column 3 section ROW_INDEX start: 152216 length 609 - Stream: column 1 section DATA start: 152825 length 20029 - Stream: column 2 section DATA start: 172854 length 40035 - Stream: column 3 section DATA start: 212889 length 270789 - Stream: column 3 section LENGTH start: 483678 length 5505 + Entry 1: count: 1000 min: Darkness,-230-368-488-586-862-930-1686 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966 positions: 2777,8442,0,695,18 + Entry 2: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660 positions: 13595,4780,0,1554,14 + Entry 3: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788 positions: 31432,228,0,2372,90 + Entry 4: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 positions: 54111,5096,0,3354,108 + Stripe: offset: 151883 data: 336358 rows: 5000 tail: 69 index: 941 + Stream: column 0 section ROW_INDEX start: 151883 length 15 + Stream: column 1 section ROW_INDEX start: 151898 length 150 + Stream: column 2 section ROW_INDEX start: 152048 length 167 + Stream: column 3 section ROW_INDEX start: 152215 length 609 + Stream: column 1 section DATA start: 152824 length 20029 + Stream: column 2 section DATA start: 172853 length 40035 + Stream: column 3 section DATA start: 212888 length 270789 + Stream: column 3 section LENGTH start: 483677 length 5505 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -100,15 +100,15 @@ Stripes: Entry 2: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988 positions: 80822,9756,0,1945,222 Entry 3: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984 positions: 137149,4496,0,3268,48 Entry 4: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 positions: 197972,6590,0,4064,342 - Stripe: offset: 489252 data: 558031 rows: 5000 tail: 69 index: 1169 - Stream: column 0 section ROW_INDEX start: 489252 length 15 - Stream: column 1 section ROW_INDEX start: 489267 length 159 - Stream: column 2 section ROW_INDEX start: 489426 length 169 - Stream: column 3 section ROW_INDEX start: 489595 length 826 - Stream: column 1 section DATA start: 490421 length 20029 - Stream: column 2 section DATA start: 510450 length 40035 - Stream: column 3 section DATA start: 550485 length 492258 - Stream: column 3 section LENGTH start: 1042743 length 5709 + Stripe: offset: 489251 data: 558031 rows: 5000 tail: 69 index: 1169 + Stream: column 0 section ROW_INDEX start: 489251 length 15 + Stream: column 1 section ROW_INDEX start: 489266 length 159 + Stream: column 2 section ROW_INDEX start: 489425 length 169 + Stream: column 3 section ROW_INDEX start: 489594 length 826 + Stream: column 1 section DATA start: 490420 length 20029 + Stream: column 2 section DATA start: 510449 length 40035 + Stream: column 3 section DATA start: 550484 length 492258 + Stream: column 3 section LENGTH start: 1042742 length 5709 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -131,15 +131,15 @@ Stripes: Entry 2: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976 positions: 170641,3422,0,2077,162 Entry 3: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766 positions: 268420,9960,0,3369,16 Entry 4: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 positions: 377916,1620,0,4041,470 - Stripe: offset: 1048521 data: 792850 rows: 5000 tail: 69 index: 1351 - Stream: column 0 section ROW_INDEX start: 1048521 length 15 - Stream: column 1 section ROW_INDEX start: 1048536 length 149 - Stream: column 2 section ROW_INDEX start: 1048685 length 170 - Stream: column 3 section ROW_INDEX start: 1048855 length 1017 - Stream: column 1 section DATA start: 1049872 length 20029 - Stream: column 2 section DATA start: 1069901 length 40035 - Stream: column 3 section DATA start: 1109936 length 727038 - Stream: column 3 section LENGTH start: 1836974 length 5748 + Stripe: offset: 1048520 data: 792850 rows: 5000 tail: 69 index: 1351 + Stream: column 0 section ROW_INDEX start: 1048520 length 15 + Stream: column 1 section ROW_INDEX start: 1048535 length 149 + Stream: column 2 section ROW_INDEX start: 1048684 length 170 + Stream: column 3 section ROW_INDEX start: 1048854 length 1017 + Stream: column 1 section DATA start: 1049871 length 20029 + Stream: column 2 section DATA start: 1069900 length 40035 + Stream: column 3 section DATA start: 1109935 length 727038 + Stream: column 3 section LENGTH start: 1836973 length 5748 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -162,15 +162,15 @@ Stripes: Entry 2: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878 positions: 263111,206,0,1926,462 Entry 3: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788 positions: 407371,8480,0,3444,250 Entry 4: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 positions: 562094,3058,0,4643,292 - Stripe: offset: 1842791 data: 188033 rows: 1000 tail: 67 index: 832 - Stream: column 0 section ROW_INDEX start: 1842791 length 10 - Stream: column 1 section ROW_INDEX start: 1842801 length 36 - Stream: column 2 section ROW_INDEX start: 1842837 length 39 - Stream: column 3 section ROW_INDEX start: 1842876 length 747 - Stream: column 1 section DATA start: 1843623 length 4007 - Stream: column 2 section DATA start: 1847630 length 8007 - Stream: column 3 section DATA start: 1855637 length 174759 - Stream: column 3 section LENGTH start: 2030396 length 1260 + Stripe: offset: 1842790 data: 188033 rows: 1000 tail: 67 index: 832 + Stream: column 0 section ROW_INDEX start: 1842790 length 10 + Stream: column 1 section ROW_INDEX start: 1842800 length 36 + Stream: column 2 section ROW_INDEX start: 1842836 length 39 + Stream: column 3 section ROW_INDEX start: 1842875 length 747 + Stream: column 1 section DATA start: 1843622 length 4007 + Stream: column 2 section DATA start: 1847629 length 8007 + Stream: column 3 section DATA start: 1855636 length 174759 + Stream: column 3 section LENGTH start: 2030395 length 1260 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -182,6 +182,6 @@ Stripes: Row group index column 3: Entry 0: count: 1000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 positions: 0,0,0,0,0 -File length: 2033559 bytes +File length: 2033557 bytes Padding length: 0 bytes Padding ratio: 0% diff --git ql/src/test/resources/orc-file-dump.out ql/src/test/resources/orc-file-dump.out index 40dfc11..d67b53a 100644 --- ql/src/test/resources/orc-file-dump.out +++ ql/src/test/resources/orc-file-dump.out @@ -38,16 +38,16 @@ File Statistics: Column 3: count: 21000 min: Darkness, max: worst sum: 81761 Stripes: - Stripe: offset: 3 data: 63766 rows: 5000 tail: 79 index: 428 + Stripe: offset: 3 data: 63765 rows: 5000 tail: 79 index: 428 Stream: column 0 section ROW_INDEX start: 3 length 15 Stream: column 1 section ROW_INDEX start: 18 length 158 Stream: column 2 section ROW_INDEX start: 176 length 171 Stream: column 3 section ROW_INDEX start: 347 length 84 Stream: column 1 section DATA start: 431 length 20029 Stream: column 2 section DATA start: 20460 length 40035 - Stream: column 3 section DATA start: 60495 length 3544 - Stream: column 3 section LENGTH start: 64039 length 25 - Stream: column 3 section DICTIONARY_DATA start: 64064 length 133 + Stream: column 3 section DATA start: 60495 length 3543 + Stream: column 3 section LENGTH start: 64038 length 25 + Stream: column 3 section DICTIONARY_DATA start: 64063 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -68,18 +68,18 @@ Stripes: Entry 0: count: 1000 min: Darkness, max: worst positions: 0,0,0 Entry 1: count: 1000 min: Darkness, max: worst positions: 0,659,149 Entry 2: count: 1000 min: Darkness, max: worst positions: 0,1531,3 - Entry 3: count: 1000 min: Darkness, max: worst positions: 0,2282,32 - Entry 4: count: 1000 min: Darkness, max: worst positions: 0,3034,45 - Stripe: offset: 64276 data: 63755 rows: 5000 tail: 79 index: 421 - Stream: column 0 section ROW_INDEX start: 64276 length 15 - Stream: column 1 section ROW_INDEX start: 64291 length 157 - Stream: column 2 section ROW_INDEX start: 64448 length 169 - Stream: column 3 section ROW_INDEX start: 64617 length 80 - Stream: column 1 section DATA start: 64697 length 20029 - Stream: column 2 section DATA start: 84726 length 40035 - Stream: column 3 section DATA start: 124761 length 3533 - Stream: column 3 section LENGTH start: 128294 length 25 - Stream: column 3 section DICTIONARY_DATA start: 128319 length 133 + Entry 3: count: 1000 min: Darkness, max: worst positions: 0,2281,32 + Entry 4: count: 1000 min: Darkness, max: worst positions: 0,3033,45 + Stripe: offset: 64275 data: 63754 rows: 5000 tail: 79 index: 421 + Stream: column 0 section ROW_INDEX start: 64275 length 15 + Stream: column 1 section ROW_INDEX start: 64290 length 157 + Stream: column 2 section ROW_INDEX start: 64447 length 169 + Stream: column 3 section ROW_INDEX start: 64616 length 80 + Stream: column 1 section DATA start: 64696 length 20029 + Stream: column 2 section DATA start: 84725 length 40035 + Stream: column 3 section DATA start: 124760 length 3532 + Stream: column 3 section LENGTH start: 128292 length 25 + Stream: column 3 section DICTIONARY_DATA start: 128317 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -101,17 +101,17 @@ Stripes: Entry 1: count: 1000 min: Darkness, max: worst positions: 0,761,12 Entry 2: count: 1000 min: Darkness, max: worst positions: 0,1472,70 Entry 3: count: 1000 min: Darkness, max: worst positions: 0,2250,43 - Entry 4: count: 1000 min: Darkness, max: worst positions: 0,2979,88 - Stripe: offset: 128531 data: 63766 rows: 5000 tail: 79 index: 422 - Stream: column 0 section ROW_INDEX start: 128531 length 15 - Stream: column 1 section ROW_INDEX start: 128546 length 153 - Stream: column 2 section ROW_INDEX start: 128699 length 169 - Stream: column 3 section ROW_INDEX start: 128868 length 85 - Stream: column 1 section DATA start: 128953 length 20029 - Stream: column 2 section DATA start: 148982 length 40035 - Stream: column 3 section DATA start: 189017 length 3544 - Stream: column 3 section LENGTH start: 192561 length 25 - Stream: column 3 section DICTIONARY_DATA start: 192586 length 133 + Entry 4: count: 1000 min: Darkness, max: worst positions: 0,2978,88 + Stripe: offset: 128529 data: 63766 rows: 5000 tail: 79 index: 422 + Stream: column 0 section ROW_INDEX start: 128529 length 15 + Stream: column 1 section ROW_INDEX start: 128544 length 153 + Stream: column 2 section ROW_INDEX start: 128697 length 169 + Stream: column 3 section ROW_INDEX start: 128866 length 85 + Stream: column 1 section DATA start: 128951 length 20029 + Stream: column 2 section DATA start: 148980 length 40035 + Stream: column 3 section DATA start: 189015 length 3544 + Stream: column 3 section LENGTH start: 192559 length 25 + Stream: column 3 section DICTIONARY_DATA start: 192584 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -134,16 +134,16 @@ Stripes: Entry 2: count: 1000 min: Darkness, max: worst positions: 0,1469,69 Entry 3: count: 1000 min: Darkness, max: worst positions: 0,2133,194 Entry 4: count: 1000 min: Darkness, max: worst positions: 0,3005,43 - Stripe: offset: 192798 data: 63796 rows: 5000 tail: 79 index: 425 - Stream: column 0 section ROW_INDEX start: 192798 length 15 - Stream: column 1 section ROW_INDEX start: 192813 length 156 - Stream: column 2 section ROW_INDEX start: 192969 length 168 - Stream: column 3 section ROW_INDEX start: 193137 length 86 - Stream: column 1 section DATA start: 193223 length 20029 - Stream: column 2 section DATA start: 213252 length 40035 - Stream: column 3 section DATA start: 253287 length 3574 - Stream: column 3 section LENGTH start: 256861 length 25 - Stream: column 3 section DICTIONARY_DATA start: 256886 length 133 + Stripe: offset: 192796 data: 63796 rows: 5000 tail: 79 index: 425 + Stream: column 0 section ROW_INDEX start: 192796 length 15 + Stream: column 1 section ROW_INDEX start: 192811 length 156 + Stream: column 2 section ROW_INDEX start: 192967 length 168 + Stream: column 3 section ROW_INDEX start: 193135 length 86 + Stream: column 1 section DATA start: 193221 length 20029 + Stream: column 2 section DATA start: 213250 length 40035 + Stream: column 3 section DATA start: 253285 length 3574 + Stream: column 3 section LENGTH start: 256859 length 25 + Stream: column 3 section DICTIONARY_DATA start: 256884 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -166,16 +166,16 @@ Stripes: Entry 2: count: 1000 min: Darkness, max: worst positions: 0,1485,52 Entry 3: count: 1000 min: Darkness, max: worst positions: 0,2196,104 Entry 4: count: 1000 min: Darkness, max: worst positions: 0,2934,131 - Stripe: offset: 257098 data: 12940 rows: 1000 tail: 71 index: 123 - Stream: column 0 section ROW_INDEX start: 257098 length 10 - Stream: column 1 section ROW_INDEX start: 257108 length 36 - Stream: column 2 section ROW_INDEX start: 257144 length 39 - Stream: column 3 section ROW_INDEX start: 257183 length 38 - Stream: column 1 section DATA start: 257221 length 4007 - Stream: column 2 section DATA start: 261228 length 8007 - Stream: column 3 section DATA start: 269235 length 768 - Stream: column 3 section LENGTH start: 270003 length 25 - Stream: column 3 section DICTIONARY_DATA start: 270028 length 133 + Stripe: offset: 257096 data: 12940 rows: 1000 tail: 71 index: 123 + Stream: column 0 section ROW_INDEX start: 257096 length 10 + Stream: column 1 section ROW_INDEX start: 257106 length 36 + Stream: column 2 section ROW_INDEX start: 257142 length 39 + Stream: column 3 section ROW_INDEX start: 257181 length 38 + Stream: column 1 section DATA start: 257219 length 4007 + Stream: column 2 section DATA start: 261226 length 8007 + Stream: column 3 section DATA start: 269233 length 768 + Stream: column 3 section LENGTH start: 270001 length 25 + Stream: column 3 section DICTIONARY_DATA start: 270026 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -187,6 +187,6 @@ Stripes: Row group index column 3: Entry 0: count: 1000 min: Darkness, max: worst positions: 0,0,0 -File length: 270759 bytes +File length: 270760 bytes Padding length: 0 bytes Padding ratio: 0% diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 6262d37..c11bf61 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -98,11 +98,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 5 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 722 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 722 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partition level analyze statistics for specific parition @@ -135,11 +135,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 323 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 323 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE @@ -158,11 +158,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 9 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 722 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 9 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 722 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -222,11 +222,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -245,11 +245,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -268,11 +268,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- both partitions will be pruned @@ -331,14 +331,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: zip (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -366,7 +366,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -403,7 +403,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 @@ -473,14 +473,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -508,11 +508,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- This is to test filter expression evaluation on partition column diff --git ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out index e0b9bd1..04cf000 100644 --- ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out @@ -924,7 +924,7 @@ Partition Parameters: numFiles 2 numRows 3 rawDataSize 60 - totalSize 1034 + totalSize 1050 #### A masked pattern was here #### # Storage Information @@ -1012,7 +1012,7 @@ Partition Parameters: numFiles 2 numRows 3 rawDataSize 60 - totalSize 1040 + totalSize 1050 #### A masked pattern was here #### # Storage Information @@ -1055,7 +1055,7 @@ Partition Parameters: numFiles 8 numRows 16 rawDataSize 320 - totalSize 4332 + totalSize 4340 #### A masked pattern was here #### # Storage Information @@ -1141,7 +1141,7 @@ Partition Parameters: numFiles 8 numRows 16 rawDataSize 320 - totalSize 4318 + totalSize 4326 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index ef63e74..ffcd262 100644 --- ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -170,7 +170,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 342 + totalSize 343 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -215,7 +215,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 364 + totalSize 361 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -338,7 +338,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 342 + totalSize 343 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -383,7 +383,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 364 + totalSize 361 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -573,7 +573,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 260 + totalSize 258 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -665,7 +665,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 247 + totalSize 245 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -711,7 +711,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 247 + totalSize 245 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -833,7 +833,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 260 + totalSize 258 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -925,7 +925,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 247 + totalSize 245 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -971,7 +971,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 247 + totalSize 245 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde diff --git ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index cbe210b..1d7391a 100644 --- ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -187,7 +187,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 342 + totalSize 343 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -232,7 +232,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 364 + totalSize 362 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -447,7 +447,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 342 + totalSize 343 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -492,7 +492,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 364 + totalSize 362 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -720,7 +720,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 342 + totalSize 343 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -765,7 +765,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 364 + totalSize 362 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -976,7 +976,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 342 + totalSize 343 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1021,7 +1021,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 364 + totalSize 362 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1304,7 +1304,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 264 + totalSize 262 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1350,7 +1350,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 278 + totalSize 276 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1396,7 +1396,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 280 + totalSize 277 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1442,7 +1442,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 260 + totalSize 258 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1488,7 +1488,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 257 + totalSize 255 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1534,7 +1534,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 269 + totalSize 265 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1580,7 +1580,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 278 + totalSize 274 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1626,7 +1626,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 247 + totalSize 245 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1672,7 +1672,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 247 + totalSize 245 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1764,7 +1764,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 271 + totalSize 270 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1893,7 +1893,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 264 + totalSize 262 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1939,7 +1939,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 278 + totalSize 276 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1985,7 +1985,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 280 + totalSize 277 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -2031,7 +2031,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 260 + totalSize 258 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -2077,7 +2077,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 257 + totalSize 255 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -2123,7 +2123,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 269 + totalSize 265 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -2169,7 +2169,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 278 + totalSize 274 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -2215,7 +2215,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 247 + totalSize 245 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -2261,7 +2261,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 247 + totalSize 245 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -2353,7 +2353,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_2d { string state, i32 locid} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 271 + totalSize 270 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde diff --git ql/src/test/results/clientpositive/orc_analyze.q.out ql/src/test/results/clientpositive/orc_analyze.q.out index 6e22f97..07e46e9 100644 --- ql/src/test/results/clientpositive/orc_analyze.q.out +++ ql/src/test/results/clientpositive/orc_analyze.q.out @@ -105,7 +105,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3123 + totalSize 3121 #### A masked pattern was here #### # Storage Information @@ -195,7 +195,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3123 + totalSize 3121 #### A masked pattern was here #### # Storage Information @@ -621,7 +621,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21980 - totalSize 4963 + totalSize 4959 #### A masked pattern was here #### # Storage Information @@ -666,7 +666,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22048 - totalSize 5051 + totalSize 5044 #### A masked pattern was here #### # Storage Information @@ -774,7 +774,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21980 - totalSize 4963 + totalSize 4959 #### A masked pattern was here #### # Storage Information @@ -819,7 +819,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22048 - totalSize 5051 + totalSize 5044 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/orc_merge1.q.out ql/src/test/results/clientpositive/orc_merge1.q.out index 07bcdfa..f38709b 100644 --- ql/src/test/results/clientpositive/orc_merge1.q.out +++ ql/src/test/results/clientpositive/orc_merge1.q.out @@ -139,7 +139,7 @@ Partition Parameters: numFiles 2 numRows 242 rawDataSize 22748 - totalSize 1750 + totalSize 1747 #### A masked pattern was here #### # Storage Information @@ -295,7 +295,7 @@ Partition Parameters: numFiles 1 numRows 242 rawDataSize 22748 - totalSize 1335 + totalSize 1332 #### A masked pattern was here #### # Storage Information @@ -441,7 +441,7 @@ Partition Parameters: numFiles 1 numRows 242 rawDataSize 22748 - totalSize 1626 + totalSize 1623 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/orc_merge5.q.out ql/src/test/results/clientpositive/orc_merge5.q.out index a71edce..2ac3342 100644 --- ql/src/test/results/clientpositive/orc_merge5.q.out +++ ql/src/test/results/clientpositive/orc_merge5.q.out @@ -121,7 +121,7 @@ Table Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -274,7 +274,7 @@ Table Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information @@ -343,7 +343,7 @@ Table Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -434,7 +434,7 @@ Table Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/orc_merge6.q.out ql/src/test/results/clientpositive/orc_merge6.q.out index 69cf6f4..05deb57 100644 --- ql/src/test/results/clientpositive/orc_merge6.q.out +++ ql/src/test/results/clientpositive/orc_merge6.q.out @@ -155,7 +155,7 @@ Partition Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -200,7 +200,7 @@ Partition Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -400,7 +400,7 @@ Partition Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information @@ -445,7 +445,7 @@ Partition Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information @@ -558,7 +558,7 @@ Partition Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -603,7 +603,7 @@ Partition Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -736,7 +736,7 @@ Partition Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information @@ -781,7 +781,7 @@ Partition Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/orc_merge7.q.out ql/src/test/results/clientpositive/orc_merge7.q.out index f6058fe..d342736 100644 --- ql/src/test/results/clientpositive/orc_merge7.q.out +++ ql/src/test/results/clientpositive/orc_merge7.q.out @@ -187,7 +187,7 @@ Partition Parameters: numFiles 1 numRows 1 rawDataSize 255 - totalSize 521 + totalSize 513 #### A masked pattern was here #### # Storage Information @@ -231,7 +231,7 @@ Partition Parameters: numFiles 2 numRows 2 rawDataSize 510 - totalSize 1058 + totalSize 1044 #### A masked pattern was here #### # Storage Information @@ -464,7 +464,7 @@ Partition Parameters: numFiles 1 numRows 1 rawDataSize 255 - totalSize 521 + totalSize 513 #### A masked pattern was here #### # Storage Information @@ -508,7 +508,7 @@ Partition Parameters: numFiles 1 numRows 2 rawDataSize 510 - totalSize 852 + totalSize 838 #### A masked pattern was here #### # Storage Information @@ -659,7 +659,7 @@ Partition Parameters: numFiles 1 numRows 1 rawDataSize 255 - totalSize 521 + totalSize 513 #### A masked pattern was here #### # Storage Information @@ -703,7 +703,7 @@ Partition Parameters: numFiles 2 numRows 2 rawDataSize 510 - totalSize 1058 + totalSize 1044 #### A masked pattern was here #### # Storage Information @@ -837,7 +837,7 @@ Partition Parameters: numFiles 1 numRows 1 rawDataSize 255 - totalSize 521 + totalSize 513 #### A masked pattern was here #### # Storage Information @@ -881,7 +881,7 @@ Partition Parameters: numFiles 1 numRows 2 rawDataSize 510 - totalSize 852 + totalSize 838 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/orc_merge_incompat1.q.out index e2d634b..e6ef838 100644 --- ql/src/test/results/clientpositive/orc_merge_incompat1.q.out +++ ql/src/test/results/clientpositive/orc_merge_incompat1.q.out @@ -172,7 +172,7 @@ Table Parameters: numFiles 5 numRows 15 rawDataSize 3825 - totalSize 2862 + totalSize 2877 #### A masked pattern was here #### # Storage Information @@ -250,7 +250,7 @@ Table Parameters: numFiles 3 numRows 15 rawDataSize 3825 - totalSize 2325 + totalSize 2340 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/orc_merge_incompat2.q.out ql/src/test/results/clientpositive/orc_merge_incompat2.q.out index c32fbf6..e28d8b3 100644 --- ql/src/test/results/clientpositive/orc_merge_incompat2.q.out +++ ql/src/test/results/clientpositive/orc_merge_incompat2.q.out @@ -245,7 +245,7 @@ Partition Parameters: numFiles 4 numRows 4 rawDataSize 1020 - totalSize 2092 + totalSize 2060 #### A masked pattern was here #### # Storage Information @@ -289,7 +289,7 @@ Partition Parameters: numFiles 4 numRows 8 rawDataSize 2040 - totalSize 2204 + totalSize 2188 #### A masked pattern was here #### # Storage Information @@ -430,7 +430,7 @@ Partition Parameters: numFiles 3 numRows 4 rawDataSize 1020 - totalSize 1851 + totalSize 1819 #### A masked pattern was here #### # Storage Information @@ -474,7 +474,7 @@ Partition Parameters: numFiles 3 numRows 8 rawDataSize 2040 - totalSize 1944 + totalSize 1928 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out index f0bf08d..53dfb2c 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out @@ -968,7 +968,7 @@ Partition Parameters: numFiles 2 numRows 3 rawDataSize 60 - totalSize 1034 + totalSize 1050 #### A masked pattern was here #### # Storage Information @@ -1056,7 +1056,7 @@ Partition Parameters: numFiles 2 numRows 3 rawDataSize 60 - totalSize 1040 + totalSize 1050 #### A masked pattern was here #### # Storage Information @@ -1099,7 +1099,7 @@ Partition Parameters: numFiles 8 numRows 16 rawDataSize 320 - totalSize 4332 + totalSize 4340 #### A masked pattern was here #### # Storage Information @@ -1185,7 +1185,7 @@ Partition Parameters: numFiles 8 numRows 16 rawDataSize 320 - totalSize 4318 + totalSize 4326 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/orc_analyze.q.out ql/src/test/results/clientpositive/tez/orc_analyze.q.out index 6e22f97..07e46e9 100644 --- ql/src/test/results/clientpositive/tez/orc_analyze.q.out +++ ql/src/test/results/clientpositive/tez/orc_analyze.q.out @@ -105,7 +105,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3123 + totalSize 3121 #### A masked pattern was here #### # Storage Information @@ -195,7 +195,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3123 + totalSize 3121 #### A masked pattern was here #### # Storage Information @@ -621,7 +621,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21980 - totalSize 4963 + totalSize 4959 #### A masked pattern was here #### # Storage Information @@ -666,7 +666,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22048 - totalSize 5051 + totalSize 5044 #### A masked pattern was here #### # Storage Information @@ -774,7 +774,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21980 - totalSize 4963 + totalSize 4959 #### A masked pattern was here #### # Storage Information @@ -819,7 +819,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22048 - totalSize 5051 + totalSize 5044 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/orc_merge1.q.out ql/src/test/results/clientpositive/tez/orc_merge1.q.out index cab357a..c87b187 100644 --- ql/src/test/results/clientpositive/tez/orc_merge1.q.out +++ ql/src/test/results/clientpositive/tez/orc_merge1.q.out @@ -146,7 +146,7 @@ Partition Parameters: numFiles 6 numRows 242 rawDataSize 22748 - totalSize 3046 + totalSize 3037 #### A masked pattern was here #### # Storage Information @@ -315,7 +315,7 @@ Partition Parameters: numFiles 1 numRows 242 rawDataSize 22748 - totalSize 1328 + totalSize 1325 #### A masked pattern was here #### # Storage Information @@ -476,7 +476,7 @@ Partition Parameters: numFiles 1 numRows 242 rawDataSize 22748 - totalSize 2401 + totalSize 2392 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/orc_merge5.q.out ql/src/test/results/clientpositive/tez/orc_merge5.q.out index 27e3b31..b40a37d 100644 --- ql/src/test/results/clientpositive/tez/orc_merge5.q.out +++ ql/src/test/results/clientpositive/tez/orc_merge5.q.out @@ -128,7 +128,7 @@ Table Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -296,7 +296,7 @@ Table Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information @@ -365,7 +365,7 @@ Table Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -456,7 +456,7 @@ Table Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/orc_merge6.q.out ql/src/test/results/clientpositive/tez/orc_merge6.q.out index 575564e..0441fa4 100644 --- ql/src/test/results/clientpositive/tez/orc_merge6.q.out +++ ql/src/test/results/clientpositive/tez/orc_merge6.q.out @@ -162,7 +162,7 @@ Partition Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -207,7 +207,7 @@ Partition Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -422,7 +422,7 @@ Partition Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information @@ -467,7 +467,7 @@ Partition Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information @@ -580,7 +580,7 @@ Partition Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -625,7 +625,7 @@ Partition Parameters: numFiles 3 numRows 3 rawDataSize 765 - totalSize 1141 + totalSize 1133 #### A masked pattern was here #### # Storage Information @@ -758,7 +758,7 @@ Partition Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information @@ -803,7 +803,7 @@ Partition Parameters: numFiles 1 numRows 3 rawDataSize 765 - totalSize 907 + totalSize 899 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/orc_merge7.q.out ql/src/test/results/clientpositive/tez/orc_merge7.q.out index 011f279..c6809a1 100644 --- ql/src/test/results/clientpositive/tez/orc_merge7.q.out +++ ql/src/test/results/clientpositive/tez/orc_merge7.q.out @@ -194,7 +194,7 @@ Partition Parameters: numFiles 1 numRows 1 rawDataSize 255 - totalSize 521 + totalSize 513 #### A masked pattern was here #### # Storage Information @@ -238,7 +238,7 @@ Partition Parameters: numFiles 2 numRows 2 rawDataSize 510 - totalSize 1058 + totalSize 1044 #### A masked pattern was here #### # Storage Information @@ -486,7 +486,7 @@ Partition Parameters: numFiles 1 numRows 1 rawDataSize 255 - totalSize 521 + totalSize 513 #### A masked pattern was here #### # Storage Information @@ -530,7 +530,7 @@ Partition Parameters: numFiles 1 numRows 2 rawDataSize 510 - totalSize 852 + totalSize 838 #### A masked pattern was here #### # Storage Information @@ -681,7 +681,7 @@ Partition Parameters: numFiles 1 numRows 1 rawDataSize 255 - totalSize 521 + totalSize 513 #### A masked pattern was here #### # Storage Information @@ -725,7 +725,7 @@ Partition Parameters: numFiles 2 numRows 2 rawDataSize 510 - totalSize 1058 + totalSize 1044 #### A masked pattern was here #### # Storage Information @@ -859,7 +859,7 @@ Partition Parameters: numFiles 1 numRows 1 rawDataSize 255 - totalSize 521 + totalSize 513 #### A masked pattern was here #### # Storage Information @@ -903,7 +903,7 @@ Partition Parameters: numFiles 1 numRows 2 rawDataSize 510 - totalSize 852 + totalSize 838 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out index 2bf4634..90f7f24 100644 --- ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out +++ ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out @@ -179,7 +179,7 @@ Table Parameters: numFiles 5 numRows 15 rawDataSize 3825 - totalSize 2862 + totalSize 2877 #### A masked pattern was here #### # Storage Information @@ -257,7 +257,7 @@ Table Parameters: numFiles 3 numRows 15 rawDataSize 3825 - totalSize 2325 + totalSize 2340 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/orc_merge_incompat2.q.out ql/src/test/results/clientpositive/tez/orc_merge_incompat2.q.out index 4d21749..30c6ab8 100644 --- ql/src/test/results/clientpositive/tez/orc_merge_incompat2.q.out +++ ql/src/test/results/clientpositive/tez/orc_merge_incompat2.q.out @@ -252,7 +252,7 @@ Partition Parameters: numFiles 4 numRows 4 rawDataSize 1020 - totalSize 2092 + totalSize 2060 #### A masked pattern was here #### # Storage Information @@ -296,7 +296,7 @@ Partition Parameters: numFiles 4 numRows 8 rawDataSize 2040 - totalSize 2204 + totalSize 2188 #### A masked pattern was here #### # Storage Information @@ -437,7 +437,7 @@ Partition Parameters: numFiles 3 numRows 4 rawDataSize 1020 - totalSize 1851 + totalSize 1819 #### A masked pattern was here #### # Storage Information @@ -481,7 +481,7 @@ Partition Parameters: numFiles 3 numRows 8 rawDataSize 2040 - totalSize 1944 + totalSize 1928 #### A masked pattern was here #### # Storage Information