diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 1cc0104da5..5eb696b328 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -584,6 +584,7 @@ minillaplocal.query.files=acid_globallimit.q,\ smb_cache.q,\ special_character_in_tabnames_1.q,\ sqlmerge.q,\ + sqlmerge_type2_scd.q,\ stats_based_fetch_decision.q,\ subquery_notin.q,\ subquery_nested_subquery.q, \ diff --git ql/src/test/queries/clientpositive/sqlmerge.q ql/src/test/queries/clientpositive/sqlmerge.q index deaf91e37f..3043e94aa6 100644 --- ql/src/test/queries/clientpositive/sqlmerge.q +++ ql/src/test/queries/clientpositive/sqlmerge.q @@ -8,11 +8,17 @@ create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc create table nonAcidOrcTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false'); --expect a cardinality check because there is update and hive.merge.cardinality.check=true by default -explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a +explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a WHEN MATCHED AND s.a > 8 THEN DELETE WHEN MATCHED THEN UPDATE SET b = 7 WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b); +--expect a cardinality check because there is update and hive.merge.cardinality.check=true +--this is to verify the plan when using inner join (vs usual ROJ) +explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a +WHEN MATCHED AND s.a > 9 THEN DELETE +WHEN MATCHED THEN UPDATE SET b = 11; + --now we expect no cardinality check since only have insert clause explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b); diff --git ql/src/test/queries/clientpositive/sqlmerge_type2_scd.q ql/src/test/queries/clientpositive/sqlmerge_type2_scd.q new file mode 100644 index 0000000000..5add32ed07 --- /dev/null +++ ql/src/test/queries/clientpositive/sqlmerge_type2_scd.q @@ -0,0 +1,173 @@ +--Some tests for Type2 SCD (slowly changing dimension) using Merge + + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.explain.user=false; +set hive.mapred.mode=nonstrict; +set hive.enforce.bucketing=true; +set hive.merge.cardinality.check=true; +--set hive.exec.dynamic.partition=true; + +drop database if exists merge_test cascade; +create database merge_test; +use merge_test; + +--Only needed for current trunk builds. + +--Basic setup for the staging data. +create table invoice_initial_stage ( + orderkey string, partkey string, quantity float, total float +); +insert into invoice_initial_stage values ('I-122-1023', 'P-328-382', 1, 1.3),('I-122-1023', 'P-888-382', 3, 1.3),('I-122-1024', 'P-234-123', 5, 5.3); + +create table part_initial_stage ( + partkey string, list_price float +); +insert into part_initial_stage values ('P-328-382', 1.5),('P-888-382', 1.8),('P-234-123', 6.0),('P-234-382', 9.0),('P-943-382', 10.0); + +--The part dimension table. +--The view hides the source system keys from end users. +create table part_0 ( + part_sk string, part_foreign_pk string, list_price float +) +CLUSTERED BY (part_foreign_pk) INTO 11 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true"); +insert into part_0 (part_sk, part_foreign_pk, list_price) + select + reflect("java.util.UUID", "randomUUID"), partkey, list_price + from + part_initial_stage; +create view part as select part_sk, list_price from part_0; + +--Create the orders summary table. +create table orders_0 ( + order_sk string, order_foreign_pk string, total_sale float, discount_rate float +) +CLUSTERED BY (order_foreign_pk) INTO 9 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true"); +insert into orders_0 (order_sk, order_foreign_pk, total_sale, discount_rate) + select + reflect("java.util.UUID", "randomUUID"), orderkey, sum(quantity*total), + 1 - sum(quantity*total) / sum(quantity*list_price) + from + invoice_initial_stage, part_0 + where + partkey = part_foreign_pk + group by + orderkey; +create view orders as select order_sk, total_sale, discount_rate from orders_0; + +--Now the invoice fact table and view. +create table invoice_0 ( + invoice_item_id string, order_sk string, invoice_foreign_orderkey string, + invoice_foreign_partkey string, part_sk string, quantity float, total float +) +CLUSTERED BY (invoice_foreign_orderkey) INTO 7 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true"); +insert into invoice_0 (invoice_item_id, order_sk, invoice_foreign_orderkey, invoice_foreign_partkey, part_sk, quantity, total) + select + reflect("java.util.UUID", "randomUUID"), order_sk, orderkey, + partkey, part_sk, quantity, total + from + orders_0, part_0, invoice_initial_stage + where + order_foreign_pk = orderkey and part_foreign_pk = partkey; +create view invoice as select order_sk, part_sk from invoice_0; + +--The second data load. +create table invoice_new_stage ( + orderkey string, partkey string, quantity float, total float +); +insert into invoice_new_stage values ('I-122-1025', 'P-234-382', 2, 8.3),('I-122-1025', 'P-943-382', 4, 9.5); +create table part_correction_stage ( + partkey string, list_price float +); +insert into part_correction_stage values ('P-888-382', 2.0); + +analyze table part_0 compute statistics; +analyze table part_0 compute statistics for columns; +analyze table part_correction_stage compute statistics; +analyze table part_correction_stage compute statistics for columns; + +--Update the part dimension table based on the correction. +--Here we are doing a Type 1 Update (Overwrite). +--This invalidates the orders aggregate. +explain +merge into part_0 +using ( + select + part_foreign_pk, part_correction_stage.list_price as updated_price + from + part_0 join part_correction_stage on part_0.part_foreign_pk = part_correction_stage.partkey +) sub +on + (sub.part_foreign_pk = part_0.part_foreign_pk) +when matched then + update set list_price = sub.updated_price; + +merge into part_0 +using ( + select + part_foreign_pk, part_correction_stage.list_price as updated_price + from + part_0 join part_correction_stage on part_0.part_foreign_pk = part_correction_stage.partkey +) sub +on + (sub.part_foreign_pk = part_0.part_foreign_pk) +when matched then + update set list_price = sub.updated_price; + +analyze table orders_0 compute statistics; +analyze table orders_0 compute statistics for columns; +analyze table invoice_0 compute statistics; +analyze table invoice_0 compute statistics for columns; + +--Fix the discount rate in historical aggregates. +explain +merge into orders_0 +using ( + select + invoice_foreign_orderkey, + 1 - sum(quantity*total) / sum(quantity*list_price) as dr + from + invoice_0 join part_0 on invoice_foreign_partkey = part_foreign_pk + group by + invoice_foreign_orderkey +) sub +on + (order_foreign_pk = invoice_foreign_orderkey) +when matched then + update set discount_rate = sub.dr; + +merge into orders_0 +using ( + select + invoice_foreign_orderkey, + 1 - sum(quantity*total) / sum(quantity*list_price) as dr + from + invoice_0 join part_0 on invoice_foreign_partkey = part_foreign_pk + group by + invoice_foreign_orderkey +) sub +on + (order_foreign_pk = invoice_foreign_orderkey) +when matched then + update set discount_rate = sub.dr; + +--Add in the new records, starting with orders. +insert into orders_0 (order_foreign_pk, total_sale, discount_rate) +select + orderkey, sum(quantity*total), + 1 - sum(quantity*total) / sum(quantity*list_price) +from + invoice_new_stage, part_0 +where + partkey = part_foreign_pk +group by + orderkey; + +insert into invoice_0 (order_sk, invoice_foreign_orderkey, invoice_foreign_partkey, part_sk, quantity, total) +select + order_sk, orderkey, partkey, part_sk, quantity, total +from + orders_0, part_0, invoice_new_stage +where + order_foreign_pk = orderkey and part_foreign_pk = partkey; diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 562d4440ba..c07ecadbf8 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -270,6 +270,220 @@ STAGE PLANS: Stats-Aggr Operator PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a +WHEN MATCHED AND s.a > 9 THEN DELETE +WHEN MATCHED THEN UPDATE SET b = 11 +PREHOOK: type: QUERY +POSTHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a +WHEN MATCHED AND s.a > 9 THEN DELETE +WHEN MATCHED THEN UPDATE SET b = 11 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: ROW__ID (type: struct) + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 a (type: int) + 1 a (type: int) + outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((_col5 > 9) and (_col0 = _col5)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col4 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((_col5 <= 9) and (_col0 = _col5)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col4 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col0 = _col5) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col4 (type: struct) + outputColumnNames: _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + keys: _col4 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + Write Type: DELETE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 11 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + Write Type: UPDATE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col1 > 1) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) PREHOOK: type: QUERY POSTHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a diff --git ql/src/test/results/clientpositive/llap/sqlmerge_type2_scd.q.out ql/src/test/results/clientpositive/llap/sqlmerge_type2_scd.q.out new file mode 100644 index 0000000000..e46c8cff11 --- /dev/null +++ ql/src/test/results/clientpositive/llap/sqlmerge_type2_scd.q.out @@ -0,0 +1,957 @@ +PREHOOK: query: drop database if exists merge_test cascade +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists merge_test cascade +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create database merge_test +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:merge_test +POSTHOOK: query: create database merge_test +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:merge_test +PREHOOK: query: use merge_test +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:merge_test +POSTHOOK: query: use merge_test +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:merge_test +PREHOOK: query: create table invoice_initial_stage ( + orderkey string, partkey string, quantity float, total float +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@invoice_initial_stage +POSTHOOK: query: create table invoice_initial_stage ( + orderkey string, partkey string, quantity float, total float +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@invoice_initial_stage +PREHOOK: query: insert into invoice_initial_stage values ('I-122-1023', 'P-328-382', 1, 1.3),('I-122-1023', 'P-888-382', 3, 1.3),('I-122-1024', 'P-234-123', 5, 5.3) +PREHOOK: type: QUERY +PREHOOK: Output: merge_test@invoice_initial_stage +POSTHOOK: query: insert into invoice_initial_stage values ('I-122-1023', 'P-328-382', 1, 1.3),('I-122-1023', 'P-888-382', 3, 1.3),('I-122-1024', 'P-234-123', 5, 5.3) +POSTHOOK: type: QUERY +POSTHOOK: Output: merge_test@invoice_initial_stage +POSTHOOK: Lineage: invoice_initial_stage.orderkey SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: invoice_initial_stage.partkey SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: invoice_initial_stage.quantity EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: invoice_initial_stage.total EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: create table part_initial_stage ( + partkey string, list_price float +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@part_initial_stage +POSTHOOK: query: create table part_initial_stage ( + partkey string, list_price float +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@part_initial_stage +PREHOOK: query: insert into part_initial_stage values ('P-328-382', 1.5),('P-888-382', 1.8),('P-234-123', 6.0),('P-234-382', 9.0),('P-943-382', 10.0) +PREHOOK: type: QUERY +PREHOOK: Output: merge_test@part_initial_stage +POSTHOOK: query: insert into part_initial_stage values ('P-328-382', 1.5),('P-888-382', 1.8),('P-234-123', 6.0),('P-234-382', 9.0),('P-943-382', 10.0) +POSTHOOK: type: QUERY +POSTHOOK: Output: merge_test@part_initial_stage +POSTHOOK: Lineage: part_initial_stage.list_price EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: part_initial_stage.partkey SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table part_0 ( + part_sk string, part_foreign_pk string, list_price float +) +CLUSTERED BY (part_foreign_pk) INTO 11 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@part_0 +POSTHOOK: query: create table part_0 ( + part_sk string, part_foreign_pk string, list_price float +) +CLUSTERED BY (part_foreign_pk) INTO 11 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@part_0 +PREHOOK: query: insert into part_0 (part_sk, part_foreign_pk, list_price) + select + reflect("java.util.UUID", "randomUUID"), partkey, list_price + from + part_initial_stage +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@part_initial_stage +PREHOOK: Output: merge_test@part_0 +POSTHOOK: query: insert into part_0 (part_sk, part_foreign_pk, list_price) + select + reflect("java.util.UUID", "randomUUID"), partkey, list_price + from + part_initial_stage +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@part_initial_stage +POSTHOOK: Output: merge_test@part_0 +POSTHOOK: Lineage: part_0.list_price SIMPLE [(part_initial_stage)part_initial_stage.FieldSchema(name:list_price, type:float, comment:null), ] +POSTHOOK: Lineage: part_0.part_foreign_pk SIMPLE [(part_initial_stage)part_initial_stage.FieldSchema(name:partkey, type:string, comment:null), ] +POSTHOOK: Lineage: part_0.part_sk EXPRESSION [] +PREHOOK: query: create view part as select part_sk, list_price from part_0 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: merge_test@part_0 +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@part +POSTHOOK: query: create view part as select part_sk, list_price from part_0 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: merge_test@part_0 +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@part +POSTHOOK: Lineage: part.list_price SIMPLE [(part_0)part_0.FieldSchema(name:list_price, type:float, comment:null), ] +POSTHOOK: Lineage: part.part_sk SIMPLE [(part_0)part_0.FieldSchema(name:part_sk, type:string, comment:null), ] +PREHOOK: query: create table orders_0 ( + order_sk string, order_foreign_pk string, total_sale float, discount_rate float +) +CLUSTERED BY (order_foreign_pk) INTO 9 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@orders_0 +POSTHOOK: query: create table orders_0 ( + order_sk string, order_foreign_pk string, total_sale float, discount_rate float +) +CLUSTERED BY (order_foreign_pk) INTO 9 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@orders_0 +PREHOOK: query: insert into orders_0 (order_sk, order_foreign_pk, total_sale, discount_rate) + select + reflect("java.util.UUID", "randomUUID"), orderkey, sum(quantity*total), + 1 - sum(quantity*total) / sum(quantity*list_price) + from + invoice_initial_stage, part_0 + where + partkey = part_foreign_pk + group by + orderkey +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@invoice_initial_stage +PREHOOK: Input: merge_test@part_0 +PREHOOK: Output: merge_test@orders_0 +POSTHOOK: query: insert into orders_0 (order_sk, order_foreign_pk, total_sale, discount_rate) + select + reflect("java.util.UUID", "randomUUID"), orderkey, sum(quantity*total), + 1 - sum(quantity*total) / sum(quantity*list_price) + from + invoice_initial_stage, part_0 + where + partkey = part_foreign_pk + group by + orderkey +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@invoice_initial_stage +POSTHOOK: Input: merge_test@part_0 +POSTHOOK: Output: merge_test@orders_0 +POSTHOOK: Lineage: orders_0.discount_rate EXPRESSION [(invoice_initial_stage)invoice_initial_stage.FieldSchema(name:quantity, type:float, comment:null), (invoice_initial_stage)invoice_initial_stage.FieldSchema(name:total, type:float, comment:null), (part_0)part_0.FieldSchema(name:list_price, type:float, comment:null), ] +POSTHOOK: Lineage: orders_0.order_foreign_pk SIMPLE [(invoice_initial_stage)invoice_initial_stage.FieldSchema(name:orderkey, type:string, comment:null), ] +POSTHOOK: Lineage: orders_0.order_sk EXPRESSION [] +POSTHOOK: Lineage: orders_0.total_sale EXPRESSION [(invoice_initial_stage)invoice_initial_stage.FieldSchema(name:quantity, type:float, comment:null), (invoice_initial_stage)invoice_initial_stage.FieldSchema(name:total, type:float, comment:null), ] +PREHOOK: query: create view orders as select order_sk, total_sale, discount_rate from orders_0 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: merge_test@orders_0 +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@orders +POSTHOOK: query: create view orders as select order_sk, total_sale, discount_rate from orders_0 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: merge_test@orders_0 +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@orders +POSTHOOK: Lineage: orders.discount_rate SIMPLE [(orders_0)orders_0.FieldSchema(name:discount_rate, type:float, comment:null), ] +POSTHOOK: Lineage: orders.order_sk SIMPLE [(orders_0)orders_0.FieldSchema(name:order_sk, type:string, comment:null), ] +POSTHOOK: Lineage: orders.total_sale SIMPLE [(orders_0)orders_0.FieldSchema(name:total_sale, type:float, comment:null), ] +PREHOOK: query: create table invoice_0 ( + invoice_item_id string, order_sk string, invoice_foreign_orderkey string, + invoice_foreign_partkey string, part_sk string, quantity float, total float +) +CLUSTERED BY (invoice_foreign_orderkey) INTO 7 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@invoice_0 +POSTHOOK: query: create table invoice_0 ( + invoice_item_id string, order_sk string, invoice_foreign_orderkey string, + invoice_foreign_partkey string, part_sk string, quantity float, total float +) +CLUSTERED BY (invoice_foreign_orderkey) INTO 7 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@invoice_0 +Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: insert into invoice_0 (invoice_item_id, order_sk, invoice_foreign_orderkey, invoice_foreign_partkey, part_sk, quantity, total) + select + reflect("java.util.UUID", "randomUUID"), order_sk, orderkey, + partkey, part_sk, quantity, total + from + orders_0, part_0, invoice_initial_stage + where + order_foreign_pk = orderkey and part_foreign_pk = partkey +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@invoice_initial_stage +PREHOOK: Input: merge_test@orders_0 +PREHOOK: Input: merge_test@part_0 +PREHOOK: Output: merge_test@invoice_0 +POSTHOOK: query: insert into invoice_0 (invoice_item_id, order_sk, invoice_foreign_orderkey, invoice_foreign_partkey, part_sk, quantity, total) + select + reflect("java.util.UUID", "randomUUID"), order_sk, orderkey, + partkey, part_sk, quantity, total + from + orders_0, part_0, invoice_initial_stage + where + order_foreign_pk = orderkey and part_foreign_pk = partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@invoice_initial_stage +POSTHOOK: Input: merge_test@orders_0 +POSTHOOK: Input: merge_test@part_0 +POSTHOOK: Output: merge_test@invoice_0 +POSTHOOK: Lineage: invoice_0.invoice_foreign_orderkey SIMPLE [(invoice_initial_stage)invoice_initial_stage.FieldSchema(name:orderkey, type:string, comment:null), ] +POSTHOOK: Lineage: invoice_0.invoice_foreign_partkey SIMPLE [(invoice_initial_stage)invoice_initial_stage.FieldSchema(name:partkey, type:string, comment:null), ] +POSTHOOK: Lineage: invoice_0.invoice_item_id EXPRESSION [] +POSTHOOK: Lineage: invoice_0.order_sk SIMPLE [(orders_0)orders_0.FieldSchema(name:order_sk, type:string, comment:null), ] +POSTHOOK: Lineage: invoice_0.part_sk SIMPLE [(part_0)part_0.FieldSchema(name:part_sk, type:string, comment:null), ] +POSTHOOK: Lineage: invoice_0.quantity SIMPLE [(invoice_initial_stage)invoice_initial_stage.FieldSchema(name:quantity, type:float, comment:null), ] +POSTHOOK: Lineage: invoice_0.total SIMPLE [(invoice_initial_stage)invoice_initial_stage.FieldSchema(name:total, type:float, comment:null), ] +PREHOOK: query: create view invoice as select order_sk, part_sk from invoice_0 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: merge_test@invoice_0 +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@invoice +POSTHOOK: query: create view invoice as select order_sk, part_sk from invoice_0 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: merge_test@invoice_0 +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@invoice +POSTHOOK: Lineage: invoice.order_sk SIMPLE [(invoice_0)invoice_0.FieldSchema(name:order_sk, type:string, comment:null), ] +POSTHOOK: Lineage: invoice.part_sk SIMPLE [(invoice_0)invoice_0.FieldSchema(name:part_sk, type:string, comment:null), ] +PREHOOK: query: create table invoice_new_stage ( + orderkey string, partkey string, quantity float, total float +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@invoice_new_stage +POSTHOOK: query: create table invoice_new_stage ( + orderkey string, partkey string, quantity float, total float +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@invoice_new_stage +PREHOOK: query: insert into invoice_new_stage values ('I-122-1025', 'P-234-382', 2, 8.3),('I-122-1025', 'P-943-382', 4, 9.5) +PREHOOK: type: QUERY +PREHOOK: Output: merge_test@invoice_new_stage +POSTHOOK: query: insert into invoice_new_stage values ('I-122-1025', 'P-234-382', 2, 8.3),('I-122-1025', 'P-943-382', 4, 9.5) +POSTHOOK: type: QUERY +POSTHOOK: Output: merge_test@invoice_new_stage +POSTHOOK: Lineage: invoice_new_stage.orderkey SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: invoice_new_stage.partkey SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: invoice_new_stage.quantity EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: invoice_new_stage.total EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: create table part_correction_stage ( + partkey string, list_price float +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:merge_test +PREHOOK: Output: merge_test@part_correction_stage +POSTHOOK: query: create table part_correction_stage ( + partkey string, list_price float +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:merge_test +POSTHOOK: Output: merge_test@part_correction_stage +PREHOOK: query: insert into part_correction_stage values ('P-888-382', 2.0) +PREHOOK: type: QUERY +PREHOOK: Output: merge_test@part_correction_stage +POSTHOOK: query: insert into part_correction_stage values ('P-888-382', 2.0) +POSTHOOK: type: QUERY +POSTHOOK: Output: merge_test@part_correction_stage +POSTHOOK: Lineage: part_correction_stage.list_price EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: part_correction_stage.partkey SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: analyze table part_0 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@part_0 +PREHOOK: Output: merge_test@part_0 +POSTHOOK: query: analyze table part_0 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@part_0 +POSTHOOK: Output: merge_test@part_0 +PREHOOK: query: analyze table part_0 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@part_0 +#### A masked pattern was here #### +PREHOOK: Output: merge_test@part_0 +POSTHOOK: query: analyze table part_0 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@part_0 +#### A masked pattern was here #### +POSTHOOK: Output: merge_test@part_0 +PREHOOK: query: analyze table part_correction_stage compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@part_correction_stage +PREHOOK: Output: merge_test@part_correction_stage +POSTHOOK: query: analyze table part_correction_stage compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@part_correction_stage +POSTHOOK: Output: merge_test@part_correction_stage +PREHOOK: query: analyze table part_correction_stage compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@part_correction_stage +#### A masked pattern was here #### +PREHOOK: Output: merge_test@part_correction_stage +POSTHOOK: query: analyze table part_correction_stage compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@part_correction_stage +#### A masked pattern was here #### +POSTHOOK: Output: merge_test@part_correction_stage +PREHOOK: query: explain +merge into part_0 +using ( + select + part_foreign_pk, part_correction_stage.list_price as updated_price + from + part_0 join part_correction_stage on part_0.part_foreign_pk = part_correction_stage.partkey +) sub +on + (sub.part_foreign_pk = part_0.part_foreign_pk) +when matched then + update set list_price = sub.updated_price +PREHOOK: type: QUERY +POSTHOOK: query: explain +merge into part_0 +using ( + select + part_foreign_pk, part_correction_stage.list_price as updated_price + from + part_0 join part_correction_stage on part_0.part_foreign_pk = part_correction_stage.partkey +) sub +on + (sub.part_foreign_pk = part_0.part_foreign_pk) +when matched then + update set list_price = sub.updated_price +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_0 + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: part_foreign_pk is not null (type: boolean) + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: part_foreign_pk (type: string) + sort order: + + Map-reduce partition columns: part_foreign_pk (type: string) + Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: part_correction_stage + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: partkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: partkey (type: string) + sort order: + + Map-reduce partition columns: partkey (type: string) + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: list_price (type: float) + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part_0 + Statistics: Num rows: 5 Data size: 1065 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: part_foreign_pk is not null (type: boolean) + Statistics: Num rows: 5 Data size: 1065 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: part_foreign_pk (type: string) + sort order: + + Map-reduce partition columns: part_foreign_pk (type: string) + Statistics: Num rows: 5 Data size: 1065 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: part_sk (type: string), ROW__ID (type: struct) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 part_foreign_pk (type: string) + 1 partkey (type: string) + outputColumnNames: _col1, _col7 + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col7 (type: float) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 part_foreign_pk (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col6 = _col1) (type: boolean) + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct) + outputColumnNames: _col5 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col5 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: (_col6 = _col1) (type: boolean) + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct), _col0 (type: string), _col1 (type: string), _col7 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 293 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 293 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: float) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: merge_test.merge_tmp_table + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 293 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 293 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: merge_test.part_0 + Write Type: UPDATE + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: merge_test.merge_tmp_table + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: merge_test.part_0 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: merge into part_0 +using ( + select + part_foreign_pk, part_correction_stage.list_price as updated_price + from + part_0 join part_correction_stage on part_0.part_foreign_pk = part_correction_stage.partkey +) sub +on + (sub.part_foreign_pk = part_0.part_foreign_pk) +when matched then + update set list_price = sub.updated_price +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@part_0 +PREHOOK: Input: merge_test@part_correction_stage +PREHOOK: Output: merge_test@merge_tmp_table +PREHOOK: Output: merge_test@part_0 +POSTHOOK: query: merge into part_0 +using ( + select + part_foreign_pk, part_correction_stage.list_price as updated_price + from + part_0 join part_correction_stage on part_0.part_foreign_pk = part_correction_stage.partkey +) sub +on + (sub.part_foreign_pk = part_0.part_foreign_pk) +when matched then + update set list_price = sub.updated_price +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@part_0 +POSTHOOK: Input: merge_test@part_correction_stage +POSTHOOK: Output: merge_test@merge_tmp_table +POSTHOOK: Output: merge_test@part_0 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(part_0)part_0.FieldSchema(name:ROW__ID, type:struct, comment:), ] +PREHOOK: query: analyze table orders_0 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@orders_0 +PREHOOK: Output: merge_test@orders_0 +POSTHOOK: query: analyze table orders_0 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@orders_0 +POSTHOOK: Output: merge_test@orders_0 +PREHOOK: query: analyze table orders_0 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@orders_0 +#### A masked pattern was here #### +PREHOOK: Output: merge_test@orders_0 +POSTHOOK: query: analyze table orders_0 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@orders_0 +#### A masked pattern was here #### +POSTHOOK: Output: merge_test@orders_0 +PREHOOK: query: analyze table invoice_0 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@invoice_0 +PREHOOK: Output: merge_test@invoice_0 +POSTHOOK: query: analyze table invoice_0 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@invoice_0 +POSTHOOK: Output: merge_test@invoice_0 +PREHOOK: query: analyze table invoice_0 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@invoice_0 +#### A masked pattern was here #### +PREHOOK: Output: merge_test@invoice_0 +POSTHOOK: query: analyze table invoice_0 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@invoice_0 +#### A masked pattern was here #### +POSTHOOK: Output: merge_test@invoice_0 +PREHOOK: query: explain +merge into orders_0 +using ( + select + invoice_foreign_orderkey, + 1 - sum(quantity*total) / sum(quantity*list_price) as dr + from + invoice_0 join part_0 on invoice_foreign_partkey = part_foreign_pk + group by + invoice_foreign_orderkey +) sub +on + (order_foreign_pk = invoice_foreign_orderkey) +when matched then + update set discount_rate = sub.dr +PREHOOK: type: QUERY +POSTHOOK: query: explain +merge into orders_0 +using ( + select + invoice_foreign_orderkey, + 1 - sum(quantity*total) / sum(quantity*list_price) as dr + from + invoice_0 join part_0 on invoice_foreign_partkey = part_foreign_pk + group by + invoice_foreign_orderkey +) sub +on + (order_foreign_pk = invoice_foreign_orderkey) +when matched then + update set discount_rate = sub.dr +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 8 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: invoice_0 + Statistics: Num rows: 3 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (invoice_foreign_partkey is not null and invoice_foreign_orderkey is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: invoice_foreign_partkey (type: string) + sort order: + + Map-reduce partition columns: invoice_foreign_partkey (type: string) + Statistics: Num rows: 3 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: invoice_foreign_orderkey (type: string), quantity (type: float), total (type: float) + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: part_0 + Statistics: Num rows: 5 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: part_foreign_pk is not null (type: boolean) + Statistics: Num rows: 5 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: part_foreign_pk (type: string) + sort order: + + Map-reduce partition columns: part_foreign_pk (type: string) + Statistics: Num rows: 5 Data size: 477 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: list_price (type: float) + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 8 + Map Operator Tree: + TableScan + alias: orders_0 + Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: order_foreign_pk is not null (type: boolean) + Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: order_foreign_pk (type: string) + sort order: + + Map-reduce partition columns: order_foreign_pk (type: string) + Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: order_sk (type: string), total_sale (type: float), ROW__ID (type: struct) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 invoice_foreign_partkey (type: string) + 1 part_foreign_pk (type: string) + outputColumnNames: _col2, _col5, _col6, _col12 + Statistics: Num rows: 3 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum((_col5 * _col6)), sum((_col5 * _col12)) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 110 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 110 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 110 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), (1 - (_col1 / _col2)) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 order_foreign_pk (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 = _col7) (type: boolean) + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col6 (type: struct) + outputColumnNames: _col6 + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col6 (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: (_col1 = _col7) (type: boolean) + Statistics: Num rows: 1 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col6 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: float), _col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: float), _col4 (type: double) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: merge_test.merge_tmp_table + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: float), UDFToFloat(VALUE._col3) (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: merge_test.orders_0 + Write Type: UPDATE + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: merge_test.merge_tmp_table + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: merge_test.orders_0 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: merge into orders_0 +using ( + select + invoice_foreign_orderkey, + 1 - sum(quantity*total) / sum(quantity*list_price) as dr + from + invoice_0 join part_0 on invoice_foreign_partkey = part_foreign_pk + group by + invoice_foreign_orderkey +) sub +on + (order_foreign_pk = invoice_foreign_orderkey) +when matched then + update set discount_rate = sub.dr +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@invoice_0 +PREHOOK: Input: merge_test@orders_0 +PREHOOK: Input: merge_test@part_0 +PREHOOK: Output: merge_test@merge_tmp_table +PREHOOK: Output: merge_test@orders_0 +POSTHOOK: query: merge into orders_0 +using ( + select + invoice_foreign_orderkey, + 1 - sum(quantity*total) / sum(quantity*list_price) as dr + from + invoice_0 join part_0 on invoice_foreign_partkey = part_foreign_pk + group by + invoice_foreign_orderkey +) sub +on + (order_foreign_pk = invoice_foreign_orderkey) +when matched then + update set discount_rate = sub.dr +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@invoice_0 +POSTHOOK: Input: merge_test@orders_0 +POSTHOOK: Input: merge_test@part_0 +POSTHOOK: Output: merge_test@merge_tmp_table +POSTHOOK: Output: merge_test@orders_0 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(orders_0)orders_0.FieldSchema(name:ROW__ID, type:struct, comment:), ] +PREHOOK: query: insert into orders_0 (order_foreign_pk, total_sale, discount_rate) +select + orderkey, sum(quantity*total), + 1 - sum(quantity*total) / sum(quantity*list_price) +from + invoice_new_stage, part_0 +where + partkey = part_foreign_pk +group by + orderkey +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@invoice_new_stage +PREHOOK: Input: merge_test@part_0 +PREHOOK: Output: merge_test@orders_0 +POSTHOOK: query: insert into orders_0 (order_foreign_pk, total_sale, discount_rate) +select + orderkey, sum(quantity*total), + 1 - sum(quantity*total) / sum(quantity*list_price) +from + invoice_new_stage, part_0 +where + partkey = part_foreign_pk +group by + orderkey +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@invoice_new_stage +POSTHOOK: Input: merge_test@part_0 +POSTHOOK: Output: merge_test@orders_0 +POSTHOOK: Lineage: orders_0.discount_rate EXPRESSION [(invoice_new_stage)invoice_new_stage.FieldSchema(name:quantity, type:float, comment:null), (invoice_new_stage)invoice_new_stage.FieldSchema(name:total, type:float, comment:null), (part_0)part_0.FieldSchema(name:list_price, type:float, comment:null), ] +POSTHOOK: Lineage: orders_0.order_foreign_pk SIMPLE [(invoice_new_stage)invoice_new_stage.FieldSchema(name:orderkey, type:string, comment:null), ] +POSTHOOK: Lineage: orders_0.order_sk SIMPLE [] +POSTHOOK: Lineage: orders_0.total_sale EXPRESSION [(invoice_new_stage)invoice_new_stage.FieldSchema(name:quantity, type:float, comment:null), (invoice_new_stage)invoice_new_stage.FieldSchema(name:total, type:float, comment:null), ] +Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: insert into invoice_0 (order_sk, invoice_foreign_orderkey, invoice_foreign_partkey, part_sk, quantity, total) +select + order_sk, orderkey, partkey, part_sk, quantity, total +from + orders_0, part_0, invoice_new_stage +where + order_foreign_pk = orderkey and part_foreign_pk = partkey +PREHOOK: type: QUERY +PREHOOK: Input: merge_test@invoice_new_stage +PREHOOK: Input: merge_test@orders_0 +PREHOOK: Input: merge_test@part_0 +PREHOOK: Output: merge_test@invoice_0 +POSTHOOK: query: insert into invoice_0 (order_sk, invoice_foreign_orderkey, invoice_foreign_partkey, part_sk, quantity, total) +select + order_sk, orderkey, partkey, part_sk, quantity, total +from + orders_0, part_0, invoice_new_stage +where + order_foreign_pk = orderkey and part_foreign_pk = partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: merge_test@invoice_new_stage +POSTHOOK: Input: merge_test@orders_0 +POSTHOOK: Input: merge_test@part_0 +POSTHOOK: Output: merge_test@invoice_0 +POSTHOOK: Lineage: invoice_0.invoice_foreign_orderkey SIMPLE [(invoice_new_stage)invoice_new_stage.FieldSchema(name:orderkey, type:string, comment:null), ] +POSTHOOK: Lineage: invoice_0.invoice_foreign_partkey SIMPLE [(invoice_new_stage)invoice_new_stage.FieldSchema(name:partkey, type:string, comment:null), ] +POSTHOOK: Lineage: invoice_0.invoice_item_id SIMPLE [] +POSTHOOK: Lineage: invoice_0.order_sk SIMPLE [(orders_0)orders_0.FieldSchema(name:order_sk, type:string, comment:null), ] +POSTHOOK: Lineage: invoice_0.part_sk SIMPLE [(part_0)part_0.FieldSchema(name:part_sk, type:string, comment:null), ] +POSTHOOK: Lineage: invoice_0.quantity SIMPLE [(invoice_new_stage)invoice_new_stage.FieldSchema(name:quantity, type:float, comment:null), ] +POSTHOOK: Lineage: invoice_0.total SIMPLE [(invoice_new_stage)invoice_new_stage.FieldSchema(name:total, type:float, comment:null), ]