diff --git a/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q b/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q new file mode 100644 index 0000000000..73bcef8866 --- /dev/null +++ b/ql/src/test/queries/clientpositive/acid_bloom_filter_orc_file_dump.q @@ -0,0 +1,31 @@ +SET hive.vectorized.execution.enabled=FALSE; +SET hive.mapred.mode=nonstrict; + +SET hive.support.concurrency=TRUE; +SET hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE bloomTest( + msisdn STRING, + imsi VARCHAR(20), + imei BIGINT, + cell_id BIGINT) + +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' + +TBLPROPERTIES ( + 'bucketing_version'='2', + 'orc.bloom.filter.columns'='msisdn,cell_id,imsi', + 'orc.bloom.filter.fpp'='0.02', + 'transactional'='true' +); + +INSERT INTO bloomTest VALUES ('12345', '12345', 12345, 12345); +INSERT INTO bloomTest VALUES ('2345', '2345', 2345, 2345); + +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecOrcFileDump; +SELECT * FROM bloomTest LIMIT 1; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/acid_bloom_filter_orc_file_dump.q.out b/ql/src/test/results/clientpositive/acid_bloom_filter_orc_file_dump.q.out new file mode 100644 index 0000000000..af4b5e4b76 --- /dev/null +++ b/ql/src/test/results/clientpositive/acid_bloom_filter_orc_file_dump.q.out @@ -0,0 +1,305 @@ +PREHOOK: query: CREATE TABLE bloomTest( + msisdn STRING, + imsi VARCHAR(20), + imei BIGINT, + cell_id BIGINT) + +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' + +TBLPROPERTIES ( + 'bucketing_version'='2', + 'orc.bloom.filter.columns'='msisdn,cell_id,imsi', + 'orc.bloom.filter.fpp'='0.02', + 'transactional'='true' +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bloomTest +POSTHOOK: query: CREATE TABLE bloomTest( + msisdn STRING, + imsi VARCHAR(20), + imei BIGINT, + cell_id BIGINT) + +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' + +TBLPROPERTIES ( + 'bucketing_version'='2', + 'orc.bloom.filter.columns'='msisdn,cell_id,imsi', + 'orc.bloom.filter.fpp'='0.02', + 'transactional'='true' +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bloomTest +PREHOOK: query: INSERT INTO bloomTest VALUES ('12345', '12345', 12345, 12345) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@bloomtest +POSTHOOK: query: INSERT INTO bloomTest VALUES ('12345', '12345', 12345, 12345) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@bloomtest +POSTHOOK: Lineage: bloomtest.cell_id SCRIPT [] +POSTHOOK: Lineage: bloomtest.imei SCRIPT [] +POSTHOOK: Lineage: bloomtest.imsi SCRIPT [] +POSTHOOK: Lineage: bloomtest.msisdn SCRIPT [] +PREHOOK: query: INSERT INTO bloomTest VALUES ('2345', '2345', 2345, 2345) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@bloomtest +POSTHOOK: query: INSERT INTO bloomTest VALUES ('2345', '2345', 2345, 2345) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@bloomtest +POSTHOOK: Lineage: bloomtest.cell_id SCRIPT [] +POSTHOOK: Lineage: bloomtest.imei SCRIPT [] +POSTHOOK: Lineage: bloomtest.imsi SCRIPT [] +POSTHOOK: Lineage: bloomtest.msisdn SCRIPT [] +PREHOOK: query: SELECT * FROM bloomTest LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@bloomtest +#### A masked pattern was here #### +-- BEGIN ORC FILE DUMP -- +#### A masked pattern was here #### +File Version: 0.12 with ORC_135 +Rows: 1 +Compression: ZLIB +Compression size: 32768 +Type: struct> + +Stripe Statistics: + Stripe 1: + Column 0: count: 1 hasNull: false + Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0 + Column 2: count: 1 hasNull: false bytesOnDisk: 6 min: 2 max: 2 sum: 2 + Column 3: count: 1 hasNull: false bytesOnDisk: 9 min: 536870912 max: 536870912 sum: 536870912 + Column 4: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0 + Column 5: count: 1 hasNull: false bytesOnDisk: 6 min: 2 max: 2 sum: 2 + Column 6: count: 1 hasNull: false + Column 7: count: 1 hasNull: false bytesOnDisk: 13 min: 2345 max: 2345 sum: 4 + Column 8: count: 1 hasNull: false bytesOnDisk: 13 min: 2345 max: 2345 sum: 4 + Column 9: count: 1 hasNull: false bytesOnDisk: 7 min: 2345 max: 2345 sum: 2345 + Column 10: count: 1 hasNull: false bytesOnDisk: 7 min: 2345 max: 2345 sum: 2345 + +File Statistics: + Column 0: count: 1 hasNull: false + Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0 + Column 2: count: 1 hasNull: false bytesOnDisk: 6 min: 2 max: 2 sum: 2 + Column 3: count: 1 hasNull: false bytesOnDisk: 9 min: 536870912 max: 536870912 sum: 536870912 + Column 4: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0 + Column 5: count: 1 hasNull: false bytesOnDisk: 6 min: 2 max: 2 sum: 2 + Column 6: count: 1 hasNull: false + Column 7: count: 1 hasNull: false bytesOnDisk: 13 min: 2345 max: 2345 sum: 4 + Column 8: count: 1 hasNull: false bytesOnDisk: 13 min: 2345 max: 2345 sum: 4 + Column 9: count: 1 hasNull: false bytesOnDisk: 7 min: 2345 max: 2345 sum: 2345 + Column 10: count: 1 hasNull: false bytesOnDisk: 7 min: 2345 max: 2345 sum: 2345 + +Stripes: + Stripe: offset: 3 data: 73 rows: 1 tail: 103 index: 595 + Stream: column 0 section ROW_INDEX start: 3 length 11 + Stream: column 1 section ROW_INDEX start: 14 length 24 + Stream: column 2 section ROW_INDEX start: 38 length 24 + Stream: column 3 section ROW_INDEX start: 62 length 29 + Stream: column 4 section ROW_INDEX start: 91 length 24 + Stream: column 5 section ROW_INDEX start: 115 length 24 + Stream: column 6 section ROW_INDEX start: 139 length 11 + Stream: column 7 section ROW_INDEX start: 150 length 30 + Stream: column 7 section BLOOM_FILTER_UTF8 start: 180 length 112 + Stream: column 8 section ROW_INDEX start: 292 length 30 + Stream: column 8 section BLOOM_FILTER_UTF8 start: 322 length 112 + Stream: column 9 section ROW_INDEX start: 434 length 27 + Stream: column 10 section ROW_INDEX start: 461 length 27 + Stream: column 10 section BLOOM_FILTER_UTF8 start: 488 length 110 + Stream: column 1 section DATA start: 598 length 6 + Stream: column 2 section DATA start: 604 length 6 + Stream: column 3 section DATA start: 610 length 9 + Stream: column 4 section DATA start: 619 length 6 + Stream: column 5 section DATA start: 625 length 6 + Stream: column 7 section DATA start: 631 length 7 + Stream: column 7 section LENGTH start: 638 length 6 + Stream: column 8 section DATA start: 644 length 7 + Stream: column 8 section LENGTH start: 651 length 6 + Stream: column 9 section DATA start: 657 length 7 + Stream: column 10 section DATA start: 664 length 7 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DIRECT_V2 + Encoding column 4: DIRECT_V2 + Encoding column 5: DIRECT_V2 + Encoding column 6: DIRECT + Encoding column 7: DIRECT_V2 + Encoding column 8: DIRECT_V2 + Encoding column 9: DIRECT_V2 + Encoding column 10: DIRECT_V2 + Row group indices for column 0: + Entry 0: count: 1 hasNull: false positions: + Row group indices for column 1: + Entry 0: count: 1 hasNull: false min: 0 max: 0 sum: 0 positions: 0,0,0 + Row group indices for column 2: + Entry 0: count: 1 hasNull: false min: 2 max: 2 sum: 2 positions: 0,0,0 + Row group indices for column 3: + Entry 0: count: 1 hasNull: false min: 536870912 max: 536870912 sum: 536870912 positions: 0,0,0 + Row group indices for column 4: + Entry 0: count: 1 hasNull: false min: 0 max: 0 sum: 0 positions: 0,0,0 + Row group indices for column 5: + Entry 0: count: 1 hasNull: false min: 2 max: 2 sum: 2 positions: 0,0,0 + Row group indices for column 6: + Entry 0: count: 1 hasNull: false positions: + Row group indices for column 7: + Entry 0: count: 1 hasNull: false min: 2345 max: 2345 sum: 4 positions: 0,0,0,0,0 + Bloom filters for column 7: + Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Row group indices for column 8: + Entry 0: count: 1 hasNull: false min: 2345 max: 2345 sum: 4 positions: 0,0,0,0,0 + Bloom filters for column 8: + Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Row group indices for column 9: + Entry 0: count: 1 hasNull: false min: 2345 max: 2345 sum: 2345 positions: 0,0,0 + Row group indices for column 10: + Entry 0: count: 1 hasNull: false min: 2345 max: 2345 sum: 2345 positions: 0,0,0 + Bloom filters for column 10: + Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + +File length: 1203 bytes +Padding length: 0 bytes +Padding ratio: 0% + +User Metadata: + hive.acid.key.index=2,536870912,0; + hive.acid.stats=1,0,0 + hive.acid.version=2 +________________________________________________________________________________________________________________________ + +-- END ORC FILE DUMP -- +-- BEGIN ORC FILE DUMP -- +#### A masked pattern was here #### +File Version: 0.12 with ORC_135 +Rows: 1 +Compression: ZLIB +Compression size: 32768 +Type: struct> + +Stripe Statistics: + Stripe 1: + Column 0: count: 1 hasNull: false + Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0 + Column 2: count: 1 hasNull: false bytesOnDisk: 6 min: 1 max: 1 sum: 1 + Column 3: count: 1 hasNull: false bytesOnDisk: 9 min: 536870912 max: 536870912 sum: 536870912 + Column 4: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0 + Column 5: count: 1 hasNull: false bytesOnDisk: 6 min: 1 max: 1 sum: 1 + Column 6: count: 1 hasNull: false + Column 7: count: 1 hasNull: false bytesOnDisk: 14 min: 12345 max: 12345 sum: 5 + Column 8: count: 1 hasNull: false bytesOnDisk: 14 min: 12345 max: 12345 sum: 5 + Column 9: count: 1 hasNull: false bytesOnDisk: 7 min: 12345 max: 12345 sum: 12345 + Column 10: count: 1 hasNull: false bytesOnDisk: 7 min: 12345 max: 12345 sum: 12345 + +File Statistics: + Column 0: count: 1 hasNull: false + Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0 + Column 2: count: 1 hasNull: false bytesOnDisk: 6 min: 1 max: 1 sum: 1 + Column 3: count: 1 hasNull: false bytesOnDisk: 9 min: 536870912 max: 536870912 sum: 536870912 + Column 4: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0 + Column 5: count: 1 hasNull: false bytesOnDisk: 6 min: 1 max: 1 sum: 1 + Column 6: count: 1 hasNull: false + Column 7: count: 1 hasNull: false bytesOnDisk: 14 min: 12345 max: 12345 sum: 5 + Column 8: count: 1 hasNull: false bytesOnDisk: 14 min: 12345 max: 12345 sum: 5 + Column 9: count: 1 hasNull: false bytesOnDisk: 7 min: 12345 max: 12345 sum: 12345 + Column 10: count: 1 hasNull: false bytesOnDisk: 7 min: 12345 max: 12345 sum: 12345 + +Stripes: + Stripe: offset: 3 data: 75 rows: 1 tail: 100 index: 597 + Stream: column 0 section ROW_INDEX start: 3 length 11 + Stream: column 1 section ROW_INDEX start: 14 length 24 + Stream: column 2 section ROW_INDEX start: 38 length 24 + Stream: column 3 section ROW_INDEX start: 62 length 29 + Stream: column 4 section ROW_INDEX start: 91 length 24 + Stream: column 5 section ROW_INDEX start: 115 length 24 + Stream: column 6 section ROW_INDEX start: 139 length 11 + Stream: column 7 section ROW_INDEX start: 150 length 31 + Stream: column 7 section BLOOM_FILTER_UTF8 start: 181 length 111 + Stream: column 8 section ROW_INDEX start: 292 length 31 + Stream: column 8 section BLOOM_FILTER_UTF8 start: 323 length 111 + Stream: column 9 section ROW_INDEX start: 434 length 29 + Stream: column 10 section ROW_INDEX start: 463 length 29 + Stream: column 10 section BLOOM_FILTER_UTF8 start: 492 length 108 + Stream: column 1 section DATA start: 600 length 6 + Stream: column 2 section DATA start: 606 length 6 + Stream: column 3 section DATA start: 612 length 9 + Stream: column 4 section DATA start: 621 length 6 + Stream: column 5 section DATA start: 627 length 6 + Stream: column 7 section DATA start: 633 length 8 + Stream: column 7 section LENGTH start: 641 length 6 + Stream: column 8 section DATA start: 647 length 8 + Stream: column 8 section LENGTH start: 655 length 6 + Stream: column 9 section DATA start: 661 length 7 + Stream: column 10 section DATA start: 668 length 7 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DIRECT_V2 + Encoding column 4: DIRECT_V2 + Encoding column 5: DIRECT_V2 + Encoding column 6: DIRECT + Encoding column 7: DIRECT_V2 + Encoding column 8: DIRECT_V2 + Encoding column 9: DIRECT_V2 + Encoding column 10: DIRECT_V2 + Row group indices for column 0: + Entry 0: count: 1 hasNull: false positions: + Row group indices for column 1: + Entry 0: count: 1 hasNull: false min: 0 max: 0 sum: 0 positions: 0,0,0 + Row group indices for column 2: + Entry 0: count: 1 hasNull: false min: 1 max: 1 sum: 1 positions: 0,0,0 + Row group indices for column 3: + Entry 0: count: 1 hasNull: false min: 536870912 max: 536870912 sum: 536870912 positions: 0,0,0 + Row group indices for column 4: + Entry 0: count: 1 hasNull: false min: 0 max: 0 sum: 0 positions: 0,0,0 + Row group indices for column 5: + Entry 0: count: 1 hasNull: false min: 1 max: 1 sum: 1 positions: 0,0,0 + Row group indices for column 6: + Entry 0: count: 1 hasNull: false positions: + Row group indices for column 7: + Entry 0: count: 1 hasNull: false min: 12345 max: 12345 sum: 5 positions: 0,0,0,0,0 + Bloom filters for column 7: + Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Row group indices for column 8: + Entry 0: count: 1 hasNull: false min: 12345 max: 12345 sum: 5 positions: 0,0,0,0,0 + Bloom filters for column 8: + Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Row group indices for column 9: + Entry 0: count: 1 hasNull: false min: 12345 max: 12345 sum: 12345 positions: 0,0,0 + Row group indices for column 10: + Entry 0: count: 1 hasNull: false min: 12345 max: 12345 sum: 12345 positions: 0,0,0 + Bloom filters for column 10: + Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25 + +File length: 1211 bytes +Padding length: 0 bytes +Padding ratio: 0% + +User Metadata: + hive.acid.key.index=1,536870912,0; + hive.acid.stats=1,0,0 + hive.acid.version=2 +________________________________________________________________________________________________________________________ + +-- END ORC FILE DUMP -- +12345 12345 12345 12345