diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index ac249ed..4cbac6b 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -69,6 +69,7 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\ smb_mapjoin_8.q minitez.query.files.shared=acid_globallimit.q,\ + acid_mapjoin.q,\ deleteAnalyze.q,\ empty_join.q,\ alter_merge_2_orc.q,\ diff --git ql/src/test/queries/clientpositive/acid_mapjoin.q ql/src/test/queries/clientpositive/acid_mapjoin.q index 5eee6e7..3ebaba4 100644 --- ql/src/test/queries/clientpositive/acid_mapjoin.q +++ ql/src/test/queries/clientpositive/acid_mapjoin.q @@ -1,7 +1,3 @@ -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; - set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; @@ -10,13 +6,64 @@ drop table if exists acid2; create table acid1 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true"); create table acid2 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true"); +create table acid3 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true"); +create table nonacid1 (key int, value string); insert into acid1 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h'); -insert into acid2 values (1,'a'),(3,'c'),(5,'e'),(7,'g'); +insert into acid2 values (1, 'a'), (3, 'c'), (5, 'e'), (7, 'g'); +insert into acid3 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'); +insert into nonacid1 values (1, 'a'), (2, 'b'); + +-- acid tables with merge join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key; +select count(*) from acid1 join acid2 on acid1.key = acid2.key; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +-- acid tables with map join explain select count(*) from acid1 join acid2 on acid1.key = acid2.key; select count(*) from acid1 join acid2 on acid1.key = acid2.key; +-- should have the same result after minor and major compaction +alter table acid1 compact 'minor'; +alter table acid2 compact 'minor'; + +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key; +select count(*) from acid1 join acid2 on acid1.key = acid2.key; + +alter table acid1 compact 'major'; +alter table acid2 compact 'major'; + +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key; +select count(*) from acid1 join acid2 on acid1.key = acid2.key; + +set hive.strict.checks.cartesian.product=false; + +-- acid table in subquery +explain +select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key; +select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key; + +-- multi-way join +explain +select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key; +select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key; + +-- one side is bucketed acid table, while the other side is non-bucketed regular table +explain +select count(*) from acid1 a join nonacid1 b on a.key = b.key; +select count(*) from acid1 a join nonacid1 b on a.key = b.key; + +-- join on non-bucketed column +explain +select count(*) from acid1 a join acid2 b on a.value = b.value; +select count(*) from acid1 a join acid2 b on a.value = b.value; + drop table acid1; drop table acid2; diff --git ql/src/test/results/clientpositive/acid_mapjoin.q.out ql/src/test/results/clientpositive/acid_mapjoin.q.out index 036c56c..187f05b 100644 --- ql/src/test/results/clientpositive/acid_mapjoin.q.out +++ ql/src/test/results/clientpositive/acid_mapjoin.q.out @@ -22,6 +22,22 @@ POSTHOOK: query: create table acid2 (key int, value string) clustered by (key) i POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@acid2 +PREHOOK: query: create table acid3 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid3 +POSTHOOK: query: create table acid3 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid3 +PREHOOK: query: create table nonacid1 (key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nonacid1 +POSTHOOK: query: create table nonacid1 (key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nonacid1 PREHOOK: query: insert into acid1 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__1 @@ -32,16 +48,251 @@ POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@acid1 POSTHOOK: Lineage: acid1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: acid1.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: insert into acid2 values (1,'a'),(3,'c'),(5,'e'),(7,'g') +PREHOOK: query: insert into acid2 values (1, 'a'), (3, 'c'), (5, 'e'), (7, 'g') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__2 PREHOOK: Output: default@acid2 -POSTHOOK: query: insert into acid2 values (1,'a'),(3,'c'),(5,'e'),(7,'g') +POSTHOOK: query: insert into acid2 values (1, 'a'), (3, 'c'), (5, 'e'), (7, 'g') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@acid2 POSTHOOK: Lineage: acid2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: acid2.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into acid3 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@acid3 +POSTHOOK: query: insert into acid3 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@acid3 +POSTHOOK: Lineage: acid3.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid3.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into nonacid1 values (1, 'a'), (2, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@nonacid1 +POSTHOOK: query: insert into nonacid1 values (1, 'a'), (2, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@nonacid1 +POSTHOOK: Lineage: nonacid1.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: nonacid1.value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: -- acid tables with merge join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- acid tables with merge join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid1 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: acid2 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 343 Data size: 1376 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 +PREHOOK: query: -- acid tables with map join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- acid tables with map join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:acid2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:acid2 + TableScan + alias: acid2 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: acid1 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 343 Data size: 1376 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 +PREHOOK: query: -- should have the same result after minor and major compaction +alter table acid1 compact 'minor' +PREHOOK: type: ALTERTABLE_COMPACT +POSTHOOK: query: -- should have the same result after minor and major compaction +alter table acid1 compact 'minor' +POSTHOOK: type: ALTERTABLE_COMPACT +PREHOOK: query: alter table acid2 compact 'minor' +PREHOOK: type: ALTERTABLE_COMPACT +POSTHOOK: query: alter table acid2 compact 'minor' +POSTHOOK: type: ALTERTABLE_COMPACT PREHOOK: query: explain select count(*) from acid1 join acid2 on acid1.key = acid2.key PREHOOK: type: QUERY @@ -139,6 +390,563 @@ POSTHOOK: Input: default@acid1 POSTHOOK: Input: default@acid2 #### A masked pattern was here #### 4 +PREHOOK: query: alter table acid1 compact 'major' +PREHOOK: type: ALTERTABLE_COMPACT +POSTHOOK: query: alter table acid1 compact 'major' +POSTHOOK: type: ALTERTABLE_COMPACT +PREHOOK: query: alter table acid2 compact 'major' +PREHOOK: type: ALTERTABLE_COMPACT +POSTHOOK: query: alter table acid2 compact 'major' +POSTHOOK: type: ALTERTABLE_COMPACT +PREHOOK: query: explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:acid2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:acid2 + TableScan + alias: acid2 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: acid1 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 343 Data size: 1376 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- acid table in subquery +explain +select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- acid table in subquery +explain +select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:acid2 + Fetch Operator + limit: -1 + $hdt$_1:b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:acid2 + TableScan + alias: acid2 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + $hdt$_1:b + TableScan + alias: b + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: acid1 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 65208 Data size: 588435 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> _col1) (type: boolean) + Statistics: Num rows: 65208 Data size: 588435 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 65208 Data size: 588435 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 71728 Data size: 647278 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +28 +PREHOOK: query: -- multi-way join +explain +select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: -- multi-way join +explain +select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + $hdt$_1:b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + $hdt$_1:b + TableScan + alias: b + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 209 Data size: 839 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 314 Data size: 1257 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 314 Data size: 1257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 314 Data size: 1257 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Statistics: Num rows: 690 Data size: 2765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +PREHOOK: Input: default@acid3 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +POSTHOOK: Input: default@acid3 +#### A masked pattern was here #### +2 +PREHOOK: query: -- one side is bucketed acid table, while the other side is non-bucketed regular table +explain +select count(*) from acid1 a join nonacid1 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- one side is bucketed acid table, while the other side is non-bucketed regular table +explain +select count(*) from acid1 a join nonacid1 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:b + TableScan + alias: b + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 312 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 343 Data size: 1376 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid1 a join nonacid1 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@nonacid1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 a join nonacid1 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@nonacid1 +#### A masked pattern was here #### +2 +PREHOOK: query: -- join on non-bucketed column +explain +select count(*) from acid1 a join acid2 b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: -- join on non-bucketed column +explain +select count(*) from acid1 a join acid2 b on a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:b + TableScan + alias: b + Statistics: Num rows: 8 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 8 Data size: 839 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 839 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 12 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1251 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 13 Data size: 1376 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid1 a join acid2 b on a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 a join acid2 b on a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 PREHOOK: query: drop table acid1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@acid1 diff --git ql/src/test/results/clientpositive/tez/acid_mapjoin.q.out ql/src/test/results/clientpositive/tez/acid_mapjoin.q.out new file mode 100644 index 0000000..3fd436b --- /dev/null +++ ql/src/test/results/clientpositive/tez/acid_mapjoin.q.out @@ -0,0 +1,578 @@ +PREHOOK: query: drop table if exists acid1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists acid1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists acid2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists acid2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table acid1 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid1 +POSTHOOK: query: create table acid1 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid1 +PREHOOK: query: create table acid2 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid2 +POSTHOOK: query: create table acid2 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid2 +PREHOOK: query: create table acid3 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid3 +POSTHOOK: query: create table acid3 (key int, value string) clustered by (key) into 2 buckets stored as orc tblproperties ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid3 +PREHOOK: query: create table nonacid1 (key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nonacid1 +POSTHOOK: query: create table nonacid1 (key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nonacid1 +PREHOOK: query: insert into acid1 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@acid1 +POSTHOOK: query: insert into acid1 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@acid1 +POSTHOOK: Lineage: acid1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid1.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into acid2 values (1, 'a'), (3, 'c'), (5, 'e'), (7, 'g') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@acid2 +POSTHOOK: query: insert into acid2 values (1, 'a'), (3, 'c'), (5, 'e'), (7, 'g') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@acid2 +POSTHOOK: Lineage: acid2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid2.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into acid3 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@acid3 +POSTHOOK: query: insert into acid3 values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@acid3 +POSTHOOK: Lineage: acid3.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid3.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into nonacid1 values (1, 'a'), (2, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@nonacid1 +POSTHOOK: query: insert into nonacid1 values (1, 'a'), (2, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@nonacid1 +POSTHOOK: Lineage: nonacid1.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: nonacid1.value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: -- acid tables with merge join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- acid tables with merge join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_19] (rows=343 width=4) + Conds:RS_6._col0=RS_7._col0(Inner) + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=312 width=4) + Output:["_col0"] + Filter Operator [FIL_17] (rows=312 width=4) + predicate:key is not null + TableScan [TS_0] (rows=312 width=4) + default@acid1,acid1, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=158 width=4) + Output:["_col0"] + Filter Operator [FIL_18] (rows=158 width=4) + predicate:key is not null + TableScan [TS_3] (rows=158 width=4) + default@acid2,acid2, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 +PREHOOK: query: -- acid tables with map join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- acid tables with map join +explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Map Join Operator [MAPJOIN_19] (rows=343 width=4) + Conds:SEL_2._col0=RS_7._col0(Inner),HybridGraceHashJoin:true + <-Map 3 [BROADCAST_EDGE] + BROADCAST [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=158 width=4) + Output:["_col0"] + Filter Operator [FIL_18] (rows=158 width=4) + predicate:key is not null + TableScan [TS_3] (rows=158 width=4) + default@acid2,acid2, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=312 width=4) + Output:["_col0"] + Filter Operator [FIL_17] (rows=312 width=4) + predicate:key is not null + TableScan [TS_0] (rows=312 width=4) + default@acid1,acid1, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 +PREHOOK: query: -- should have the same result after minor and major compaction +alter table acid1 compact 'minor' +PREHOOK: type: ALTERTABLE_COMPACT +POSTHOOK: query: -- should have the same result after minor and major compaction +alter table acid1 compact 'minor' +POSTHOOK: type: ALTERTABLE_COMPACT +PREHOOK: query: alter table acid2 compact 'minor' +PREHOOK: type: ALTERTABLE_COMPACT +POSTHOOK: query: alter table acid2 compact 'minor' +POSTHOOK: type: ALTERTABLE_COMPACT +PREHOOK: query: explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Map Join Operator [MAPJOIN_19] (rows=343 width=4) + Conds:SEL_2._col0=RS_7._col0(Inner),HybridGraceHashJoin:true + <-Map 3 [BROADCAST_EDGE] + BROADCAST [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=158 width=4) + Output:["_col0"] + Filter Operator [FIL_18] (rows=158 width=4) + predicate:key is not null + TableScan [TS_3] (rows=158 width=4) + default@acid2,acid2, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=312 width=4) + Output:["_col0"] + Filter Operator [FIL_17] (rows=312 width=4) + predicate:key is not null + TableScan [TS_0] (rows=312 width=4) + default@acid1,acid1, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 +PREHOOK: query: alter table acid1 compact 'major' +PREHOOK: type: ALTERTABLE_COMPACT +POSTHOOK: query: alter table acid1 compact 'major' +POSTHOOK: type: ALTERTABLE_COMPACT +PREHOOK: query: alter table acid2 compact 'major' +PREHOOK: type: ALTERTABLE_COMPACT +POSTHOOK: query: alter table acid2 compact 'major' +POSTHOOK: type: ALTERTABLE_COMPACT +PREHOOK: query: explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Map Join Operator [MAPJOIN_19] (rows=343 width=4) + Conds:SEL_2._col0=RS_7._col0(Inner),HybridGraceHashJoin:true + <-Map 3 [BROADCAST_EDGE] + BROADCAST [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=158 width=4) + Output:["_col0"] + Filter Operator [FIL_18] (rows=158 width=4) + predicate:key is not null + TableScan [TS_3] (rows=158 width=4) + default@acid2,acid2, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=312 width=4) + Output:["_col0"] + Filter Operator [FIL_17] (rows=312 width=4) + predicate:key is not null + TableScan [TS_0] (rows=312 width=4) + default@acid1,acid1, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 join acid2 on acid1.key = acid2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: -- acid table in subquery +explain +select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- acid table in subquery +explain +select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_21] + Group By Operator [GBY_19] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Map Join Operator [MAPJOIN_28] (rows=54225 width=9) + Conds:SEL_9._col0=RS_14._col0(Inner),HybridGraceHashJoin:true + <-Map 4 [BROADCAST_EDGE] + BROADCAST [RS_14] + PartitionCols:_col0 + Select Operator [SEL_12] (rows=312 width=4) + Output:["_col0"] + Filter Operator [FIL_26] (rows=312 width=4) + predicate:key is not null + TableScan [TS_10] (rows=312 width=4) + default@acid1,b, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_9] (rows=49296 width=9) + Output:["_col0"] + Filter Operator [FIL_8] (rows=49296 width=9) + predicate:(_col0 <> _col1) + Map Join Operator [MAPJOIN_27] (rows=49296 width=9) + Conds:(Inner),Output:["_col0","_col1"] + <-Map 3 [BROADCAST_EDGE] + BROADCAST [RS_6] + Select Operator [SEL_4] (rows=158 width=4) + Output:["_col0"] + Filter Operator [FIL_25] (rows=158 width=4) + predicate:key is not null + TableScan [TS_2] (rows=158 width=4) + default@acid2,acid2, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_1] (rows=312 width=4) + Output:["_col0"] + TableScan [TS_0] (rows=312 width=4) + default@acid1,acid1, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from (select * from acid1 join acid2 where acid1.key <> acid2.key) a join acid1 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +28 +PREHOOK: query: -- multi-way join +explain +select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: -- multi-way join +explain +select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_18] + Group By Operator [GBY_16] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 3 [SIMPLE_EDGE] + SHUFFLE [RS_15] + Group By Operator [GBY_14] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Map Join Operator [MAPJOIN_28] (rows=690 width=4) + Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=SEL_8._col0(Inner),HybridGraceHashJoin:true + <-Map 1 [BROADCAST_EDGE] + BROADCAST [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=312 width=4) + Output:["_col0"] + Filter Operator [FIL_25] (rows=312 width=4) + predicate:key is not null + TableScan [TS_0] (rows=312 width=4) + default@acid1,a, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 2 [BROADCAST_EDGE] + BROADCAST [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=158 width=4) + Output:["_col0"] + Filter Operator [FIL_26] (rows=158 width=4) + predicate:key is not null + TableScan [TS_3] (rows=158 width=4) + default@acid2,b, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_8] (rows=314 width=4) + Output:["_col0"] + Filter Operator [FIL_27] (rows=314 width=4) + predicate:key is not null + TableScan [TS_6] (rows=314 width=4) + default@acid3,c, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +PREHOOK: Input: default@acid3 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 a join acid2 b on a.key = b.key join acid3 c on a.key = c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +POSTHOOK: Input: default@acid3 +#### A masked pattern was here #### +2 +PREHOOK: query: -- one side is bucketed acid table, while the other side is non-bucketed regular table +explain +select count(*) from acid1 a join nonacid1 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- one side is bucketed acid table, while the other side is non-bucketed regular table +explain +select count(*) from acid1 a join nonacid1 b on a.key = b.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Map Join Operator [MAPJOIN_19] (rows=343 width=4) + Conds:SEL_2._col0=RS_7._col0(Inner),HybridGraceHashJoin:true + <-Map 3 [BROADCAST_EDGE] + BROADCAST [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=2 width=3) + Output:["_col0"] + Filter Operator [FIL_18] (rows=2 width=3) + predicate:key is not null + TableScan [TS_3] (rows=2 width=3) + default@nonacid1,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=312 width=4) + Output:["_col0"] + Filter Operator [FIL_17] (rows=312 width=4) + predicate:key is not null + TableScan [TS_0] (rows=312 width=4) + default@acid1,a, ACID table,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: select count(*) from acid1 a join nonacid1 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@nonacid1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 a join nonacid1 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@nonacid1 +#### A masked pattern was here #### +2 +PREHOOK: query: -- join on non-bucketed column +explain +select count(*) from acid1 a join acid2 b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: -- join on non-bucketed column +explain +select count(*) from acid1 a join acid2 b on a.value = b.value +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Map Join Operator [MAPJOIN_19] (rows=13 width=105) + Conds:SEL_2._col0=RS_7._col0(Inner),HybridGraceHashJoin:true + <-Map 3 [BROADCAST_EDGE] + BROADCAST [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=6 width=105) + Output:["_col0"] + Filter Operator [FIL_18] (rows=6 width=105) + predicate:value is not null + TableScan [TS_3] (rows=6 width=105) + default@acid2,b, ACID table,Tbl:COMPLETE,Col:NONE,Output:["value"] + <-Select Operator [SEL_2] (rows=12 width=104) + Output:["_col0"] + Filter Operator [FIL_17] (rows=12 width=104) + predicate:value is not null + TableScan [TS_0] (rows=12 width=104) + default@acid1,a, ACID table,Tbl:COMPLETE,Col:NONE,Output:["value"] + +PREHOOK: query: select count(*) from acid1 a join acid2 b on a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@acid1 +PREHOOK: Input: default@acid2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid1 a join acid2 b on a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid1 +POSTHOOK: Input: default@acid2 +#### A masked pattern was here #### +4 +PREHOOK: query: drop table acid1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@acid1 +PREHOOK: Output: default@acid1 +POSTHOOK: query: drop table acid1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@acid1 +POSTHOOK: Output: default@acid1 +PREHOOK: query: drop table acid2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@acid2 +PREHOOK: Output: default@acid2 +POSTHOOK: query: drop table acid2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@acid2 +POSTHOOK: Output: default@acid2