diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 441b278..4af4c58 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -185,6 +185,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_where_non_partitioned.q,\ update_where_partitioned.q,\ update_two_cols.q,\ + vector_acid3.q,\ vector_aggregate_9.q,\ vector_between_in.q,\ vector_binary_join_groupby.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index b02374e..074ad64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6689,10 +6689,6 @@ private void checkAcidConstraints(QB qb, TableDesc tableDesc, LOG.debug("Couldn't find table " + tableName + " in insertIntoTable"); throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg()); } - if (conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) { - LOG.info("Turning off vectorization for acid write operation"); - conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false); - } LOG.info("Modifying config values for ACID write"); conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, true); conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1); diff --git ql/src/test/queries/clientpositive/vector_acid3.q ql/src/test/queries/clientpositive/vector_acid3.q new file mode 100644 index 0000000..169b19f --- /dev/null +++ ql/src/test/queries/clientpositive/vector_acid3.q @@ -0,0 +1,23 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.exec.dynamic.partition=true; +set hive.vectorized.execution.enabled=true; + +drop table if exists testacid1; + +create table testacid1(id int) clustered by (id) into 2 buckets stored as orc tblproperties("transactional"="true"); + +explain +insert into table testacid1 values (1),(2),(3),(4); + +insert into table testacid1 values (1),(2),(3),(4); + +set hive.compute.query.using.stats=false; + +set hive.vectorized.execution.enabled; + +explain +select count(1) from testacid1; + +select count(1) from testacid1; diff --git ql/src/test/results/clientpositive/tez/vector_acid3.q.out ql/src/test/results/clientpositive/tez/vector_acid3.q.out new file mode 100644 index 0000000..7878894 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_acid3.q.out @@ -0,0 +1,98 @@ +PREHOOK: query: drop table if exists testacid1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists testacid1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table testacid1(id int) clustered by (id) into 2 buckets stored as orc tblproperties("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testacid1 +POSTHOOK: query: create table testacid1(id int) clustered by (id) into 2 buckets stored as orc tblproperties("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testacid1 +PREHOOK: query: explain +insert into table testacid1 values (1),(2),(3),(4) +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table testacid1 values (1),(2),(3),(4) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.ql.io.orc.OrcSerde","name:":"default.testacid1","input format:":"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat","output format:":"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"} + Stage-2 + Dependency Collection{} + Stage-1 + Map 1 + File Output Operator [FS_3] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.ql.io.orc.OrcSerde","name:":"default.testacid1","input format:":"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat","output format:":"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"} + Select Operator [SEL_1] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:values__tmp__table__1 + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + +PREHOOK: query: insert into table testacid1 values (1),(2),(3),(4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@testacid1 +POSTHOOK: query: insert into table testacid1 values (1),(2),(3),(4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@testacid1 +POSTHOOK: Lineage: testacid1.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +hive.vectorized.execution.enabled=true +PREHOOK: query: explain +select count(1) from testacid1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from testacid1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_7] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [OP_10] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [OP_9] + aggregations:["count(1)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [OP_8] + Statistics:Num rows: 4 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:testacid1 + Statistics:Num rows: 4 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + +PREHOOK: query: select count(1) from testacid1 +PREHOOK: type: QUERY +PREHOOK: Input: default@testacid1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from testacid1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testacid1 +#### A masked pattern was here #### +4 diff --git ql/src/test/results/clientpositive/vector_acid3.q.out ql/src/test/results/clientpositive/vector_acid3.q.out new file mode 100644 index 0000000..72f3def --- /dev/null +++ ql/src/test/results/clientpositive/vector_acid3.q.out @@ -0,0 +1,124 @@ +PREHOOK: query: drop table if exists testacid1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists testacid1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table testacid1(id int) clustered by (id) into 2 buckets stored as orc tblproperties("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testacid1 +POSTHOOK: query: create table testacid1(id int) clustered by (id) into 2 buckets stored as orc tblproperties("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testacid1 +PREHOOK: query: explain +insert into table testacid1 values (1),(2),(3),(4) +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table testacid1 values (1),(2),(3),(4) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.testacid1 + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.testacid1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert into table testacid1 values (1),(2),(3),(4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@testacid1 +POSTHOOK: query: insert into table testacid1 values (1),(2),(3),(4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@testacid1 +POSTHOOK: Lineage: testacid1.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +hive.vectorized.execution.enabled=true +PREHOOK: query: explain +select count(1) from testacid1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from testacid1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: testacid1 + Statistics: Num rows: 4 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 4 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from testacid1 +PREHOOK: type: QUERY +PREHOOK: Input: default@testacid1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from testacid1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testacid1 +#### A masked pattern was here #### +4