Details
-
Task
-
Status: Closed
-
Major
-
Resolution: Fixed
-
None
-
None
Description
#Repro steps: #1./ set hive.compactor.max.num.delta to 5 on HMS #2./ Set up the table set hive.merge.cardinality.check=false; create table test (k int); ALTER TABLE test SET TBLPROPERTIES ('NO_AUTO_COMPACTION'='true'); insert into test values (1); alter table test compact 'major' and wait; dfs -ls '/warehouse/tablespace/managed/hive/test'; # drwxrwx---+ - hive hive 0 2021-08-09 12:26 /warehouse/tablespace/managed/hive/test/base_0000008_v0000416 select * from test; # k=1 #run 3 times so there's enough delta dirs, ie. 6 (should just increase k by 1) #basically just removes the row and adds a new row with k+1 value MERGE INTO test AS T USING (select * from test union all select k+1 from test) AS S ON T.k=s.k WHEN MATCHED THEN DELETE WHEN not MATCHED THEN INSERT values (s.k); select * from test; #k=4 dfs -ls '/warehouse/tablespace/managed/hive/test'; #drwxrwx---+ - hive hive 0 2021-08-09 12:26 /warehouse/tablespace/managed/hive/test/base_0000008_v0000416 #drwxrwx---+ - hive hive 0 2021-08-09 12:28 /warehouse/tablespace/managed/hive/test/delete_delta_0000009_0000009_0001 #drwxrwx---+ - hive hive 0 2021-08-09 12:29 /warehouse/tablespace/managed/hive/test/delete_delta_0000010_0000010_0001 #drwxrwx---+ - hive hive 0 2021-08-09 12:29 /warehouse/tablespace/managed/hive/test/delete_delta_0000011_0000011_0001 #drwxrwx---+ - hive hive 0 2021-08-09 12:28 /warehouse/tablespace/managed/hive/test/delta_0000009_0000009_0003 #drwxrwx---+ - hive hive 0 2021-08-09 12:29 /warehouse/tablespace/managed/hive/test/delta_0000010_0000010_0003 #drwxrwx---+ - hive hive 0 2021-08-09 12:29 /warehouse/tablespace/managed/hive/test/delta_0000011_0000011_0003 alter table test compact 'major' and wait; select * from test; #result is empty dfs -ls '/warehouse/tablespace/managed/hive/test'; #2 drwxrwx---+ - hive hive 0 2021-08-09 12:31 /warehouse/tablespace/managed/hive/test/base_0000011_v0000428
Some logs from the above example:
2021-08-09 12:30:37,532 WARN org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: 6 delta files found for default.test located at hdfs://nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site:8020/warehouse/tablespace/managed/hive/test! This is likely a sign of misconfiguration, especially if this message repeats. Check that compaction is running properly. Check for any runaway/mis-configured process writing to ACID tables, especially using Streaming Ingest API. 2021-08-09 12:30:37,533 INFO org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: Submitting MINOR compaction job 'nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49-compactor-default.test_0' to default queue. (current delta dirs count=5, obsolete delta dirs count=-1. TxnIdRange[9,11] 2021-08-09 12:30:38,003 INFO org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: Submitted compaction job 'nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49-compactor-default.test_0' with jobID=job_1628497133224_0051 compaction ID=23 #From app logs of the minor compaction, note that delta_0000011_0000011_0001 is missing from the list 2021-08-09 12:30:47,399 INFO [main] org.apache.hadoop.mapred.MapTask: Processing split: CompactorInputSplit{base: null, bucket: 0, length: 3231, deltas: [delete_delta_0000009_0000009_0001, delta_0000009_0000009_0003, delete_delta_0000010_0000010_0001, delta_0000010_0000010_0003, delete_delta_0000011_0000011_0001]} 2021-08-09 12:30:53,061 INFO org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: Submitting MAJOR compaction job 'nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49-compactor-default.test' to default queue. (current delta dirs count=2, obsolete delta dirs count=6. TxnIdRange[9,11] 2021-08-09 12:30:53,501 INFO org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: Submitted compaction job 'nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49-compactor-default.test' with jobID=job_1628497133224_0052 compaction ID=23 2021-08-09 12:31:03,493 INFO [main] org.apache.hadoop.mapred.MapTask: Processing split: CompactorInputSplit{base: hdfs://nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site:8020/warehouse/tablespace/managed/hive/test/base_0000008_v0000416, bucket: 0, length: 1697, deltas: [delete_delta_0000009_0000011_v0000428, delta_0000009_0000011_v0000428]}
Attachments
Issue Links
- links to