diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index cd47a63e14..4d71eb4f4d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -1127,12 +1127,19 @@ else if (prev != null && next.maxWriteId == prev.maxWriteId } } - if(bestBase.oldestBase != null && bestBase.status == null) { + if(bestBase.oldestBase != null && bestBase.status == null && + MetaDataFile.isCompacted(bestBase.oldestBase, fs)) { /** * If here, it means there was a base_x (> 1 perhaps) but none were suitable for given * {@link writeIdList}. Note that 'original' files are logically a base_Long.MIN_VALUE and thus * cannot have any data for an open txn. We could check {@link deltas} has files to cover - * [1,n] w/o gaps but this would almost never happen...*/ + * [1,n] w/o gaps but this would almost never happen... + * + * We only throw for base_x produced by Compactor since that base erases all history and + * cannot be used for a client that has a snapshot in which something inside this base is + * open. (Nor can we ignore this base of course) But base_x which is a result of IOW, + * contains all history so we treat it just like delta wrt visibility. Imagine, IOW which + * aborts. It creates a base_x, which can and should just be ignored.*/ long[] exceptions = writeIdList.getInvalidWriteIds(); String minOpenWriteId = exceptions != null && exceptions.length > 0 ? Long.toString(exceptions[0]) : "x"; diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 536281d555..9a4322d74b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -108,7 +108,7 @@ public void testInsertOverwrite() throws Exception { Assert.assertEquals(1, rs.size()); Assert.assertEquals("1", rs.get(0)); hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, true); - runStatementOnDriver("insert into " + Table.ACIDTBL + " values(3,2)"); + runStatementOnDriver("insert overwrite table " + Table.ACIDTBL + " values(3,2)"); hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, false); runStatementOnDriver("insert into " + Table.ACIDTBL + " values(5,6)"); rs = runStatementOnDriver("select a from " + Table.ACIDTBL + " order by a"); diff --git ql/src/test/queries/clientpositive/mm_insert_overwrite_aborted.q ql/src/test/queries/clientpositive/mm_insert_overwrite_aborted.q new file mode 100644 index 0000000000..938e1f462a --- /dev/null +++ ql/src/test/queries/clientpositive/mm_insert_overwrite_aborted.q @@ -0,0 +1,20 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.exec.dynamic.partition=true; +set hive.vectorized.execution.enabled=true; +set hive.create.as.insert.only=true; + +drop table if exists studentparttab30k; +create table studentparttab30k (name string) row format delimited fields terminated by '\\t' stored as textfile; +insert into studentparttab30k values('a'); + +drop table if exists multi_insert_1; +create table multi_insert_1 (name string) row format delimited fields terminated by '\\t' stored as textfile; + +set hive.test.rollbacktxn=true; + +insert overwrite table multi_insert_1 select name FROM studentparttab30k; + +set hive.test.rollbacktxn=false; +select * from multi_insert_1; diff --git ql/src/test/results/clientpositive/mm_insert_overwrite_aborted.q.out ql/src/test/results/clientpositive/mm_insert_overwrite_aborted.q.out new file mode 100644 index 0000000000..d645b86b83 --- /dev/null +++ ql/src/test/results/clientpositive/mm_insert_overwrite_aborted.q.out @@ -0,0 +1,50 @@ +PREHOOK: query: drop table if exists studentparttab30k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists studentparttab30k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table studentparttab30k (name string) row format delimited fields terminated by '\\t' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studentparttab30k +POSTHOOK: query: create table studentparttab30k (name string) row format delimited fields terminated by '\\t' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studentparttab30k +PREHOOK: query: insert into studentparttab30k values('a') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@studentparttab30k +POSTHOOK: query: insert into studentparttab30k values('a') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@studentparttab30k +POSTHOOK: Lineage: studentparttab30k.name SCRIPT [] +PREHOOK: query: drop table if exists multi_insert_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists multi_insert_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table multi_insert_1 (name string) row format delimited fields terminated by '\\t' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@multi_insert_1 +POSTHOOK: query: create table multi_insert_1 (name string) row format delimited fields terminated by '\\t' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@multi_insert_1 +PREHOOK: query: insert overwrite table multi_insert_1 select name FROM studentparttab30k +PREHOOK: type: QUERY +PREHOOK: Input: default@studentparttab30k +PREHOOK: Output: default@multi_insert_1 +POSTHOOK: query: insert overwrite table multi_insert_1 select name FROM studentparttab30k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studentparttab30k +POSTHOOK: Output: default@multi_insert_1 +POSTHOOK: Lineage: multi_insert_1.name SIMPLE [(studentparttab30k)studentparttab30k.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: select * from multi_insert_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@multi_insert_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from multi_insert_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi_insert_1 +#### A masked pattern was here ####