diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index db2ad3f..11cd0cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6235,6 +6235,9 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) // Check constraints on acid tables. This includes // * no insert overwrites // * no use of vectorization + // * turns off reduce deduplication optimization, as that sometimes breaks acid + // This method assumes you have already decided that this is an Acid write. Don't call it if + // that isn't true. private void checkAcidConstraints(QB qb, TableDesc tableDesc) throws SemanticException { String tableName = tableDesc.getTableName(); if (!qb.getParseInfo().isInsertIntoTable(tableName)) { @@ -6245,6 +6248,9 @@ private void checkAcidConstraints(QB qb, TableDesc tableDesc) throws SemanticExc LOG.info("Turning off vectorization for acid write operation"); conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false); } + LOG.info("Modifying config values for ACID write"); + conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, false); + conf.setBoolVar(ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES, true); } /** diff --git ql/src/test/queries/clientpositive/acid_vectorization.q ql/src/test/queries/clientpositive/acid_vectorization.q index 9d91d88..804144a 100644 --- ql/src/test/queries/clientpositive/acid_vectorization.q +++ ql/src/test/queries/clientpositive/acid_vectorization.q @@ -4,7 +4,6 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.vectorized.execution.enabled=true; -set hive.mapred.supports.subdirectories=true; CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC; insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; diff --git ql/src/test/queries/clientpositive/delete_all_non_partitioned.q ql/src/test/queries/clientpositive/delete_all_non_partitioned.q index 3c0bf62..80a5991 100644 --- ql/src/test/queries/clientpositive/delete_all_non_partitioned.q +++ ql/src/test/queries/clientpositive/delete_all_non_partitioned.q @@ -2,7 +2,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.exec.reducers.max = 1; create table acid_danp(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc; diff --git ql/src/test/queries/clientpositive/delete_all_partitioned.q ql/src/test/queries/clientpositive/delete_all_partitioned.q index c271896..b848319 100644 --- ql/src/test/queries/clientpositive/delete_all_partitioned.q +++ ql/src/test/queries/clientpositive/delete_all_partitioned.q @@ -2,7 +2,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.mapred.supports.subdirectories=true; create table acid_dap(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc; diff --git ql/src/test/queries/clientpositive/delete_where_partitioned.q ql/src/test/queries/clientpositive/delete_where_partitioned.q index 04dc03e..cce89f4 100644 --- ql/src/test/queries/clientpositive/delete_where_partitioned.q +++ ql/src/test/queries/clientpositive/delete_where_partitioned.q @@ -2,7 +2,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.mapred.supports.subdirectories=true; create table acid_dwp(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc; diff --git ql/src/test/queries/clientpositive/delete_whole_partition.q ql/src/test/queries/clientpositive/delete_whole_partition.q index 4548cb6..2cb3e74 100644 --- ql/src/test/queries/clientpositive/delete_whole_partition.q +++ ql/src/test/queries/clientpositive/delete_whole_partition.q @@ -2,7 +2,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.mapred.supports.subdirectories=true; create table acid_dwhp(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc; diff --git ql/src/test/queries/clientpositive/insert_update_delete.q ql/src/test/queries/clientpositive/insert_update_delete.q index bbae4e9..34350df 100644 --- ql/src/test/queries/clientpositive/insert_update_delete.q +++ ql/src/test/queries/clientpositive/insert_update_delete.q @@ -2,7 +2,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.mapred.supports.subdirectories=true; create table acid_iud(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc; diff --git ql/src/test/queries/clientpositive/insert_values_dynamic_partitioned.q ql/src/test/queries/clientpositive/insert_values_dynamic_partitioned.q index 2602189..bde2e71 100644 --- ql/src/test/queries/clientpositive/insert_values_dynamic_partitioned.q +++ ql/src/test/queries/clientpositive/insert_values_dynamic_partitioned.q @@ -3,7 +3,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.mapred.supports.subdirectories=true; create table ivdp(i int, de decimal(5,2), diff --git ql/src/test/queries/clientpositive/insert_values_partitioned.q ql/src/test/queries/clientpositive/insert_values_partitioned.q index 11f0d4e..23d6d4c 100644 --- ql/src/test/queries/clientpositive/insert_values_partitioned.q +++ ql/src/test/queries/clientpositive/insert_values_partitioned.q @@ -2,7 +2,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.exec.dynamic.partition.mode=nonstrict; create table acid_ivp(ti tinyint, si smallint, diff --git ql/src/test/queries/clientpositive/update_after_multiple_inserts.q ql/src/test/queries/clientpositive/update_after_multiple_inserts.q index f7aab4d..04d2df5 100644 --- ql/src/test/queries/clientpositive/update_after_multiple_inserts.q +++ ql/src/test/queries/clientpositive/update_after_multiple_inserts.q @@ -3,7 +3,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.mapred.supports.subdirectories=true; create table acid_uami(i int, de decimal(5,2), diff --git ql/src/test/queries/clientpositive/update_all_partitioned.q ql/src/test/queries/clientpositive/update_all_partitioned.q index 9a5870a..0b6c767 100644 --- ql/src/test/queries/clientpositive/update_all_partitioned.q +++ ql/src/test/queries/clientpositive/update_all_partitioned.q @@ -2,7 +2,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.mapred.supports.subdirectories=true; create table acid_uap(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc; diff --git ql/src/test/queries/clientpositive/update_where_partitioned.q ql/src/test/queries/clientpositive/update_where_partitioned.q index eec745a..c5b6d04 100644 --- ql/src/test/queries/clientpositive/update_where_partitioned.q +++ ql/src/test/queries/clientpositive/update_where_partitioned.q @@ -2,7 +2,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.enforce.bucketing=true; -set hive.mapred.supports.subdirectories=true; create table acid_uwp(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc;