diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 392f7ce..db2ad3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -47,7 +47,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; @@ -5866,7 +5865,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) if (!isNonNativeTable) { AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass()); if (acidOp != AcidUtils.Operation.NOT_ACID) { - checkIfAcidAndOverwriting(qb, table_desc); + checkAcidConstraints(qb, table_desc); } ltd = new LoadTableDesc(queryTmpdir,table_desc, dpCtx, acidOp); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -5973,7 +5972,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) dest_part.isStoredAsSubDirectories(), conf); AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass()); if (acidOp != AcidUtils.Operation.NOT_ACID) { - checkIfAcidAndOverwriting(qb, table_desc); + checkAcidConstraints(qb, table_desc); } ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -6233,15 +6232,19 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) return output; } - // Check if we are overwriting any tables. If so, throw an exception as that is not allowed - // when using an Acid compliant txn manager and operating on an acid table. - private void checkIfAcidAndOverwriting(QB qb, TableDesc tableDesc) throws SemanticException { + // Check constraints on acid tables. This includes + // * no insert overwrites + // * no use of vectorization + private void checkAcidConstraints(QB qb, TableDesc tableDesc) throws SemanticException { String tableName = tableDesc.getTableName(); if (!qb.getParseInfo().isInsertIntoTable(tableName)) { LOG.debug("Couldn't find table " + tableName + " in insertIntoTable"); throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg()); } - + if (conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) { + LOG.info("Turning off vectorization for acid write operation"); + conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false); + } } /** diff --git ql/src/test/queries/clientpositive/acid_vectorization.q ql/src/test/queries/clientpositive/acid_vectorization.q new file mode 100644 index 0000000..9d91d88 --- /dev/null +++ ql/src/test/queries/clientpositive/acid_vectorization.q @@ -0,0 +1,16 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.enforce.bucketing=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=true; +set hive.mapred.supports.subdirectories=true; + +CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC; +insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; +set hive.vectorized.execution.enabled=true; +insert into table acid_vectorized values (1, 'bar'); +set hive.vectorized.execution.enabled=true; +update acid_vectorized set b = 'foo' where b = 'bar'; +set hive.vectorized.execution.enabled=true; +delete from acid_vectorized where b = 'foo'; diff --git ql/src/test/results/clientpositive/acid_vectorization.q.out ql/src/test/results/clientpositive/acid_vectorization.q.out new file mode 100644 index 0000000..4a9d19f --- /dev/null +++ ql/src/test/results/clientpositive/acid_vectorization.q.out @@ -0,0 +1,44 @@ +PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert into table acid_vectorized values (1, 'bar') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized values (1, 'bar') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: update acid_vectorized set b = 'foo' where b = 'bar' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: update acid_vectorized set b = 'foo' where b = 'bar' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: delete from acid_vectorized where b = 'foo' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: delete from acid_vectorized where b = 'foo' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized