diff --git a/ql/src/test/resources/orc-file-dump.json b/ql/src/test/resources/orc-file-dump.json index 14cf962..646dfe5 100644 --- a/ql/src/test/resources/orc-file-dump.json +++ b/ql/src/test/resources/orc-file-dump.json @@ -4,7 +4,7 @@ "writerVersion": "HIVE_4243", "numberOfRows": 21000, "compression": "ZLIB", - "compressionBufferSize": 10000, + "compressionBufferSize": 4096, "schemaString": "struct", "schema": [ { @@ -254,8 +254,8 @@ "stripeNumber": 1, "stripeInformation": { "offset": 3, - "indexLength": 863, - "dataLength": 63749, + "indexLength": 970, + "dataLength": 63770, "footerLength": 90, "rowCount": 5000 }, @@ -270,60 +270,60 @@ "columnId": 1, "section": "ROW_INDEX", "startOffset": 20, - "length": 165 + "length": 167 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 185, - "length": 174 + "startOffset": 187, + "length": 171 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 359, + "startOffset": 358, "length": 103 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 462, - "length": 404 + "startOffset": 461, + "length": 512 }, { "columnId": 1, "section": "DATA", - "startOffset": 866, - "length": 20029 + "startOffset": 973, + "length": 20035 }, { "columnId": 2, "section": "DATA", - "startOffset": 20895, - "length": 40035 + "startOffset": 21008, + "length": 40050 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 60930, + "startOffset": 61058, "length": 17 }, { "columnId": 3, "section": "DATA", - "startOffset": 60947, + "startOffset": 61075, "length": 3510 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 64457, + "startOffset": 64585, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 64482, + "startOffset": 64610, "length": 133 } ], @@ -494,77 +494,77 @@ { "stripeNumber": 2, "stripeInformation": { - "offset": 64705, - "indexLength": 854, - "dataLength": 63742, - "footerLength": 90, + "offset": 64833, + "indexLength": 961, + "dataLength": 63763, + "footerLength": 88, "rowCount": 5000 }, "streams": [ { "columnId": 0, "section": "ROW_INDEX", - "startOffset": 64705, + "startOffset": 64833, "length": 17 }, { "columnId": 1, "section": "ROW_INDEX", - "startOffset": 64722, - "length": 164 + "startOffset": 64850, + "length": 166 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 64886, - "length": 169 + "startOffset": 65016, + "length": 166 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 65055, + "startOffset": 65182, "length": 100 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 65155, - "length": 404 + "startOffset": 65282, + "length": 512 }, { "columnId": 1, "section": "DATA", - "startOffset": 65559, - "length": 20029 + "startOffset": 65794, + "length": 20035 }, { "columnId": 2, "section": "DATA", - "startOffset": 85588, - "length": 40035 + "startOffset": 85829, + "length": 40050 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 125623, + "startOffset": 125879, "length": 17 }, { "columnId": 3, "section": "DATA", - "startOffset": 125640, + "startOffset": 125896, "length": 3503 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 129143, + "startOffset": 129399, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 129168, + "startOffset": 129424, "length": 133 } ], @@ -735,77 +735,77 @@ { "stripeNumber": 3, "stripeInformation": { - "offset": 129391, - "indexLength": 853, - "dataLength": 63749, - "footerLength": 90, + "offset": 129645, + "indexLength": 962, + "dataLength": 63770, + "footerLength": 91, "rowCount": 5000 }, "streams": [ { "columnId": 0, "section": "ROW_INDEX", - "startOffset": 129391, + "startOffset": 129645, "length": 17 }, { "columnId": 1, "section": "ROW_INDEX", - "startOffset": 129408, - "length": 160 + "startOffset": 129662, + "length": 164 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 129568, - "length": 170 + "startOffset": 129826, + "length": 167 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 129738, + "startOffset": 129993, "length": 102 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 129840, - "length": 404 + "startOffset": 130095, + "length": 512 }, { "columnId": 1, "section": "DATA", - "startOffset": 130244, - "length": 20029 + "startOffset": 130607, + "length": 20035 }, { "columnId": 2, "section": "DATA", - "startOffset": 150273, - "length": 40035 + "startOffset": 150642, + "length": 40050 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 190308, + "startOffset": 190692, "length": 17 }, { "columnId": 3, "section": "DATA", - "startOffset": 190325, + "startOffset": 190709, "length": 3510 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 193835, + "startOffset": 194219, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 193860, + "startOffset": 194244, "length": 133 } ], @@ -976,77 +976,77 @@ { "stripeNumber": 4, "stripeInformation": { - "offset": 194083, - "indexLength": 866, - "dataLength": 63735, - "footerLength": 90, + "offset": 194468, + "indexLength": 973, + "dataLength": 63756, + "footerLength": 91, "rowCount": 5000 }, "streams": [ { "columnId": 0, "section": "ROW_INDEX", - "startOffset": 194083, + "startOffset": 194468, "length": 17 }, { "columnId": 1, "section": "ROW_INDEX", - "startOffset": 194100, - "length": 164 + "startOffset": 194485, + "length": 166 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 194264, - "length": 174 + "startOffset": 194651, + "length": 171 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 194438, + "startOffset": 194822, "length": 107 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 194545, - "length": 404 + "startOffset": 194929, + "length": 512 }, { "columnId": 1, "section": "DATA", - "startOffset": 194949, - "length": 20029 + "startOffset": 195441, + "length": 20035 }, { "columnId": 2, "section": "DATA", - "startOffset": 214978, - "length": 40035 + "startOffset": 215476, + "length": 40050 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 255013, + "startOffset": 255526, "length": 17 }, { "columnId": 3, "section": "DATA", - "startOffset": 255030, + "startOffset": 255543, "length": 3496 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 258526, + "startOffset": 259039, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 258551, + "startOffset": 259064, "length": 133 } ], @@ -1217,9 +1217,9 @@ { "stripeNumber": 5, "stripeInformation": { - "offset": 258774, + "offset": 259288, "indexLength": 433, - "dataLength": 12940, + "dataLength": 12943, "footerLength": 83, "rowCount": 1000 }, @@ -1227,67 +1227,67 @@ { "columnId": 0, "section": "ROW_INDEX", - "startOffset": 258774, + "startOffset": 259288, "length": 12 }, { "columnId": 1, "section": "ROW_INDEX", - "startOffset": 258786, + "startOffset": 259300, "length": 38 }, { "columnId": 2, "section": "ROW_INDEX", - "startOffset": 258824, + "startOffset": 259338, "length": 41 }, { "columnId": 3, "section": "ROW_INDEX", - "startOffset": 258865, + "startOffset": 259379, "length": 41 }, { "columnId": 3, "section": "BLOOM_FILTER", - "startOffset": 258906, + "startOffset": 259420, "length": 301 }, { "columnId": 1, "section": "DATA", - "startOffset": 259207, + "startOffset": 259721, "length": 4007 }, { "columnId": 2, "section": "DATA", - "startOffset": 263214, - "length": 8007 + "startOffset": 263728, + "length": 8010 }, { "columnId": 3, "section": "PRESENT", - "startOffset": 271221, + "startOffset": 271738, "length": 16 }, { "columnId": 3, "section": "DATA", - "startOffset": 271237, + "startOffset": 271754, "length": 752 }, { "columnId": 3, "section": "LENGTH", - "startOffset": 271989, + "startOffset": 272506, "length": 25 }, { "columnId": 3, "section": "DICTIONARY_DATA", - "startOffset": 272014, + "startOffset": 272531, "length": 133 } ], @@ -1348,7 +1348,7 @@ }] } ], - "fileLength": 272779, + "fileLength": 273300, "paddingLength": 0, "paddingRatio": 0 } diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out index 9c4e83c..bef44a5 100644 --- a/ql/src/test/resources/orc-file-has-null.out +++ b/ql/src/test/resources/orc-file-has-null.out @@ -2,7 +2,7 @@ Structure for TestOrcFile.testHasNull.orc File Version: 0.12 with HIVE_4243 Rows: 20000 Compression: ZLIB -Compression size: 10000 +Compression size: 4096 Type: struct Stripe Statistics: @@ -29,16 +29,16 @@ File Statistics: Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000 Stripes: - Stripe: offset: 3 data: 195 rows: 5000 tail: 64 index: 162 + Stripe: offset: 3 data: 241 rows: 5000 tail: 67 index: 163 Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 63 - Stream: column 2 section ROW_INDEX start: 83 length 82 - Stream: column 1 section DATA start: 165 length 113 - Stream: column 1 section LENGTH start: 278 length 32 - Stream: column 2 section PRESENT start: 310 length 13 - Stream: column 2 section DATA start: 323 length 22 - Stream: column 2 section LENGTH start: 345 length 6 - Stream: column 2 section DICTIONARY_DATA start: 351 length 9 + Stream: column 1 section ROW_INDEX start: 20 length 64 + Stream: column 2 section ROW_INDEX start: 84 length 82 + Stream: column 1 section DATA start: 166 length 159 + Stream: column 1 section LENGTH start: 325 length 32 + Stream: column 2 section PRESENT start: 357 length 13 + Stream: column 2 section DATA start: 370 length 22 + Stream: column 2 section LENGTH start: 392 length 6 + Stream: column 2 section DICTIONARY_DATA start: 398 length 9 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[2] @@ -48,16 +48,16 @@ Stripes: Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488 Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,136,488 Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,136,488 - Stripe: offset: 424 data: 156 rows: 5000 tail: 60 index: 119 - Stream: column 0 section ROW_INDEX start: 424 length 17 - Stream: column 1 section ROW_INDEX start: 441 length 63 - Stream: column 2 section ROW_INDEX start: 504 length 39 - Stream: column 1 section DATA start: 543 length 113 - Stream: column 1 section LENGTH start: 656 length 32 - Stream: column 2 section PRESENT start: 688 length 11 - Stream: column 2 section DATA start: 699 length 0 - Stream: column 2 section LENGTH start: 699 length 0 - Stream: column 2 section DICTIONARY_DATA start: 699 length 0 + Stripe: offset: 474 data: 202 rows: 5000 tail: 64 index: 120 + Stream: column 0 section ROW_INDEX start: 474 length 17 + Stream: column 1 section ROW_INDEX start: 491 length 64 + Stream: column 2 section ROW_INDEX start: 555 length 39 + Stream: column 1 section DATA start: 594 length 159 + Stream: column 1 section LENGTH start: 753 length 32 + Stream: column 2 section PRESENT start: 785 length 11 + Stream: column 2 section DATA start: 796 length 0 + Stream: column 2 section LENGTH start: 796 length 0 + Stream: column 2 section DICTIONARY_DATA start: 796 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -67,15 +67,15 @@ Stripes: Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 - Stripe: offset: 759 data: 186 rows: 5000 tail: 60 index: 148 - Stream: column 0 section ROW_INDEX start: 759 length 17 - Stream: column 1 section ROW_INDEX start: 776 length 63 - Stream: column 2 section ROW_INDEX start: 839 length 68 - Stream: column 1 section DATA start: 907 length 113 - Stream: column 1 section LENGTH start: 1020 length 32 - Stream: column 2 section DATA start: 1052 length 24 - Stream: column 2 section LENGTH start: 1076 length 6 - Stream: column 2 section DICTIONARY_DATA start: 1082 length 11 + Stripe: offset: 860 data: 232 rows: 5000 tail: 63 index: 149 + Stream: column 0 section ROW_INDEX start: 860 length 17 + Stream: column 1 section ROW_INDEX start: 877 length 64 + Stream: column 2 section ROW_INDEX start: 941 length 68 + Stream: column 1 section DATA start: 1009 length 159 + Stream: column 1 section LENGTH start: 1168 length 32 + Stream: column 2 section DATA start: 1200 length 24 + Stream: column 2 section LENGTH start: 1224 length 6 + Stream: column 2 section DICTIONARY_DATA start: 1230 length 11 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[1] @@ -85,16 +85,16 @@ Stripes: Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464 Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440 Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416 - Stripe: offset: 1153 data: 156 rows: 5000 tail: 60 index: 119 - Stream: column 0 section ROW_INDEX start: 1153 length 17 - Stream: column 1 section ROW_INDEX start: 1170 length 63 - Stream: column 2 section ROW_INDEX start: 1233 length 39 - Stream: column 1 section DATA start: 1272 length 113 - Stream: column 1 section LENGTH start: 1385 length 32 - Stream: column 2 section PRESENT start: 1417 length 11 - Stream: column 2 section DATA start: 1428 length 0 - Stream: column 2 section LENGTH start: 1428 length 0 - Stream: column 2 section DICTIONARY_DATA start: 1428 length 0 + Stripe: offset: 1304 data: 202 rows: 5000 tail: 64 index: 120 + Stream: column 0 section ROW_INDEX start: 1304 length 17 + Stream: column 1 section ROW_INDEX start: 1321 length 64 + Stream: column 2 section ROW_INDEX start: 1385 length 39 + Stream: column 1 section DATA start: 1424 length 159 + Stream: column 1 section LENGTH start: 1583 length 32 + Stream: column 2 section PRESENT start: 1615 length 11 + Stream: column 2 section DATA start: 1626 length 0 + Stream: column 2 section LENGTH start: 1626 length 0 + Stream: column 2 section DICTIONARY_DATA start: 1626 length 0 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DICTIONARY_V2[0] @@ -105,6 +105,6 @@ Stripes: Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 -File length: 1736 bytes +File length: 1940 bytes Padding length: 0 bytes Padding ratio: 0%