diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q index 78e7515b68..71a7399692 100644 --- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q @@ -16,7 +16,7 @@ create table srcpart_hour as select hr as hr, hr as hour from srcpart group by h create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr; create table srcpart_double_hour as select (hr*2) as hr, hr as hour from srcpart group by hr; --- single column, single key +-- single column, single key -- join a partitioned table to a non-partitioned table, static filter on the non-partitioned table EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.spark.dynamic.partition.pruning=false; @@ -41,8 +41,8 @@ select count(*) from srcpart join srcpart_date on abs(negative(cast(concat(cast( EXPLAIN select count(*) from srcpart join srcpart_date on cast(day(srcpart.ds) as smallint) = cast(day(srcpart_date.ds) as decimal) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on cast(day(srcpart.ds) as smallint) = cast(day(srcpart_date.ds) as decimal) where srcpart_date.`date` = '2008-04-08'; --- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +-- multiple sources, single key -- filter partitioned table on both partitioned columns via join with non-partitioned table +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; @@ -54,7 +54,7 @@ where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; set hive.spark.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; --- multiple columns single source +-- multiple columns single source -- filter partitioned table on both partitioned columns via join with non-partitioned table, filter non-partitioned table EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.spark.dynamic.partition.pruning=false; @@ -63,7 +63,7 @@ select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_dat set hive.spark.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; --- empty set +-- empty set -- join a partitioned table to a non-partitioned table, static filter on the non-partitioned table that doesn't filter out anything EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.spark.dynamic.partition.pruning=false; @@ -72,7 +72,7 @@ select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) set hive.spark.dynamic.partition.pruning=true; select count(*) from srcpart where ds = 'I DONT EXIST'; --- expressions +-- expressions -- triggers DPP with various expressions - e.g. cast, multiplication, division EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; @@ -89,8 +89,7 @@ select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as s set hive.spark.dynamic.partition.pruning=true; select count(*) from srcpart where cast(hr as string) = 11; - --- parent is reduce tasks +-- parent is reduce tasks -- join a partitioned table to a non-partitioned table, where the non-partitioned table is a subquery, static filter on the subquery EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; @@ -155,8 +154,7 @@ select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; --- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; @@ -170,10 +168,6 @@ EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from sr select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; --- single column, single key, udf with typechange -EXPLAIN select count(*) from srcpart join srcpart_date on (day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = '2008-04-08'; -select count(*) from srcpart join srcpart_date on (day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = '2008-04-08'; - -- left join EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; @@ -188,16 +182,26 @@ select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; --- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) --- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +-- different file format +create table srcpart_parquet (key int, value string) partitioned by (ds string, hr int) stored as parquet; + +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=false; +set hive.exec.max.dynamic.partitions=1000; + +insert into table srcpart_parquet partition (ds, hr) select key, value, ds, hr from srcpart; +EXPLAIN select count(*) from srcpart_parquet join srcpart_date_hour on (srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +select count(*) from srcpart_parquet join srcpart_date_hour on (srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11; -drop table srcpart_orc; +drop table srcpart_parquet; drop table srcpart_date; drop table srcpart_hour; drop table srcpart_date_hour; diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q index ac4ff934e7..8863cf4461 100644 --- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q @@ -1,16 +1,228 @@ -set hive.spark.dynamic.partition.pruning=true; -set hive.auto.convert.join=true; +CREATE TABLE partitioned_table1 (col int) PARTITIONED BY (part_col int); +CREATE TABLE partitioned_table2 (col int) PARTITIONED BY (part_col int); +CREATE TABLE partitioned_table3 (col int) PARTITIONED BY (part_col int); +CREATE TABLE partitioned_table4 (col int) PARTITIONED BY (part_col1 int, part_col2 int); +CREATE TABLE partitioned_table5 (col int) PARTITIONED BY (part_col1 int, part_col2 int); -create table partitioned_table1 (col int) partitioned by (part_col int); -create table partitioned_table2 (col int) partitioned by (part_col int); -create table regular_table (col int); -insert into table regular_table values (1); +CREATE TABLE regular_table1 (col1 int, col2 int); +CREATE TABLE regular_table2 (col1 int, col2 int); -alter table partitioned_table1 add partition (part_col = 1); -insert into table partitioned_table1 partition (part_col = 1) values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 1); +ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 2); +ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 3); -alter table partitioned_table2 add partition (part_col = 1); -insert into table partitioned_table2 partition (part_col = 1) values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 1); +ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 2); +ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 3); -explain select * from partitioned_table1, partitioned_table2 where partitioned_table1.part_col = partitioned_table2.part_col; -explain select * from partitioned_table1 where partitioned_table1.part_col in (select regular_table.col from regular_table join partitioned_table2 on regular_table.col = partitioned_table2.part_col); \ No newline at end of file +ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 1); +ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 2); +ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 3); + +ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 1, part_col2 = 1); +ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 2, part_col2 = 2); +ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 3, part_col2 = 3); + +ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 1, part_col2 = 1); +ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 2, part_col2 = 2); +ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 3, part_col2 = 3); + +INSERT INTO TABLE regular_table1 VALUES (0, 0), (1, 1), (2, 2); +INSERT INTO TABLE regular_table2 VALUES (0, 0), (1, 1), (2, 2); + +INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6); + +INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6); + +INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6); + +INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 1, part_col2 = 1) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 2, part_col2 = 2) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 3, part_col2 = 3) VALUES (1), (2), (3), (4), (5), (6); + +INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 1, part_col2 = 1) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 2, part_col2 = 2) VALUES (1), (2), (3), (4), (5), (6); +INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 3, part_col2 = 3) VALUES (1), (2), (3), (4), (5), (6); + +SET hive.spark.dynamic.partition.pruning.map.join.only=true; +SET hive.strict.checks.cartesian.product=false; +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask.size=15; -- ensure the partitioned tables are treated as big tables + +-- left semi join where the subquery is a join between a partitioned and a non-partitioned table +EXPLAIN SELECT count(*) FROM partitioned_table1 WHERE partitioned_table1.part_col IN ( +SELECT regular_table1.col1 FROM regular_table1 JOIN partitioned_table2 ON +regular_table1.col1 = partitioned_table2.part_col AND partitioned_table2.col > 3 AND regular_table1.col1 > 1); + +SELECT count(*) +FROM partitioned_table1 +WHERE partitioned_table1.part_col IN (SELECT regular_table1.col1 + FROM regular_table1 + JOIN partitioned_table2 + ON + regular_table1.col1 = partitioned_table2.part_col AND partitioned_table2.col > 3 AND regular_table1.col1 > 1); + +-- three-table join with dpp against one partitioned table +EXPLAIN SELECT count(*) FROM partitioned_table1, regular_table1 rt1, +regular_table1 rt2 WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 = +partitioned_table1.part_col AND rt1.col2 > 0 AND rt2.col2 > 1; + +SELECT count(*) +FROM partitioned_table1, + regular_table1 rt1, + regular_table1 rt2 +WHERE rt1.col1 = partitioned_table1.part_col + AND rt2.col1 = partitioned_table1.part_col + AND rt1.col2 > 0 + AND rt2.col2 > 1; + +-- four-table join with dpp against two separate partitioned tables +EXPLAIN SELECT count(*) FROM partitioned_table1, partitioned_table2, regular_table1 rt1 +, regular_table1 rt2 WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 = +partitioned_table2.part_col AND rt1.col2 > 0 AND rt2.col2 > 1; + +SELECT count(*) +FROM partitioned_table1, + partitioned_table2, + regular_table1 rt1, + regular_table1 rt2 +WHERE rt1.col1 = partitioned_table1.part_col + AND rt2.col1 = partitioned_table2.part_col + AND rt1.col2 > 0 + AND rt2.col2 > 1; + +-- dpp with between filter +EXPLAIN SELECT count(*) FROM regular_table1, partitioned_table1 WHERE regular_table1.col1 += partitioned_table1.part_col AND regular_table1.col2 BETWEEN 1 AND 3; + +SELECT count(*) +FROM regular_table1, + partitioned_table1 +WHERE regular_table1.col1 = partitioned_table1.part_col + AND regular_table1.col2 BETWEEN 1 AND 3; + +-- dpp with cte +EXPLAIN WITH q1 AS (SELECT regular_table1.col1 AS col FROM regular_table1 WHERE +regular_table1.col2 > 1), q2 AS (SELECT partitioned_table1.part_col AS col FROM +partitioned_table1 WHERE partitioned_table1.col > 1) SELECT count(*) FROM q1 JOIN q2 ON +q1.col = q2.col; + +WITH q1 + AS (SELECT regular_table1.col1 AS col + FROM regular_table1 + WHERE regular_table1.col2 > 1), + q2 + AS (SELECT partitioned_table1.part_col AS col + FROM partitioned_table1 + WHERE partitioned_table1.col > 1) +SELECT count(*) +FROM q1 + JOIN q2 + ON q1.col = q2.col; + +-- join two partitioned tables with a filter +EXPLAIN SELECT count(*) FROM partitioned_table1, partitioned_table2 WHERE +partitioned_table1.part_col = partitioned_table2.part_col AND partitioned_table2.col > 1; + +SELECT count(*) +FROM partitioned_table1, + partitioned_table2 +WHERE partitioned_table1.part_col = partitioned_table2.part_col + AND partitioned_table2.col > 1; + +-- dpp betwen two partitioned tables, both with multiple partition columns +SET hive.auto.convert.join.noconditionaltask.size=150; -- set auto convert size to a higher value so map-joins are triggered for the partitioned tables + +EXPLAIN SELECT count(*) FROM partitioned_table4, partitioned_table5 WHERE +partitioned_table4.part_col1 = partitioned_table5.part_col1 AND +partitioned_table4.part_col2 = partitioned_table5.part_col2; + +SELECT count(*) +FROM partitioned_table4, + partitioned_table5 +WHERE partitioned_table4.part_col1 = partitioned_table5.part_col1 + AND partitioned_table4.part_col2 = partitioned_table5.part_col2; + +-- dpp is pushed through multiple levels of joins +EXPLAIN SELECT count(*) FROM partitioned_table1 JOIN regular_table1 ON +partitioned_table1.part_col = regular_table1.col1 JOIN regular_table2 ON +regular_table1.col1 = regular_table2.col1; + +SELECT count(*) +FROM partitioned_table1 + JOIN regular_table1 + ON partitioned_table1.part_col = regular_table1.col1 + JOIN regular_table2 + ON regular_table1.col1 = regular_table2.col1; + +SET hive.auto.convert.join.noconditionaltask.size=15; -- reset auto convert size to previous value + +-- three-way join where the partitioned table is the smallest table +-- disabled until HIVE-17225 is fixed +-- EXPLAIN SELECT * FROM partitioned_table1, regular_table1 rt1, regular_table2 rt2 +-- WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 = +-- partitioned_table1.part_col AND partitioned_table1.col > 3; + +-- SELECT * +-- FROM partitioned_table1, +-- regular_table1 rt1, +-- regular_table2 rt2 +-- WHERE rt1.col1 = partitioned_table1.part_col +-- AND rt2.col1 = partitioned_table1.part_col AND partitioned_table1.col > 3; + +-- dpp is pushed to partition columns that are added to each other +-- disabled until HIVE-17244 is fixed +-- EXPLAIN SELECT count(*) FROM partitioned_table4 pt4 JOIN regular_table1 rt1 +-- ON pt4.part_col1 + pt4.part_col2 = rt1.col1 + 1; + +-- SELECT count(*) +-- FROM partitioned_table4 pt4 +-- JOIN regular_table1 rt1 +-- ON pt4.part_col1 + pt4.part_col2 = rt1.col1 + 1; + +-- dpp pushed to all union operands +-- disabled until HIVE-17239 is fixed +-- EXPLAIN SELECT count(*) FROM (SELECT part_col FROM partitioned_table1 UNION ALL SELECT +-- part_col FROM partitioned_table2) q1 JOIN regular_table1 JOIN regular_table2 +-- WHERE q1.part_col = regular_table1.col1 AND q1.part_col = regular_table2.col1; + +-- SELECT count(*) +-- FROM ( +-- SELECT part_col +-- FROM partitioned_table1 +-- UNION ALL +-- SELECT part_col +-- FROM partitioned_table2) q1 +-- JOIN regular_table1 +-- JOIN regular_table2 +-- where q1.part_col = regular_table1.col1 +-- AND q1.part_col = regular_table2.col1; + +-- left semi join where the subquery is a join between a partitioned and a partitioned table +-- disabled until HIVE-17238 is fixed +-- EXPLAIN SELECT count(*) FROM partitioned_table1 WHERE partitioned_table1.part_col IN ( +-- SELECT partitioned_table2.col FROM partitioned_table2 JOIN partitioned_table3 ON +-- partitioned_table3.col = partitioned_table2.part_col AND partitioned_table2.col > 1); + +-- SELECT count(*) +-- FROM partitioned_table1 +-- WHERE partitioned_table1.part_col IN (SELECT partitioned_table2.col +-- FROM partitioned_table2 +-- JOIN partitioned_table3 +-- ON +-- partitioned_table3.col = partitioned_table2.part_col +-- AND partitioned_table2.col > 1); + +DROP TABLE partitioned_table1; +DROP TABLE partitioned_table2; +DROP TABLE partitioned_table3; +DROP TABLE partitioned_table4; +DROP TABLE partitioned_table5; +DROP TABLE regular_table1; +DROP TABLE regular_table2; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index 63a9548da5..4cd51f79a7 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -48,7 +48,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 4) + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -235,7 +235,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -342,7 +342,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -491,7 +491,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -598,7 +598,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -738,7 +738,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -881,7 +881,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -976,10 +976,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -1050,8 +1050,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1197,8 +1197,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1399,7 +1399,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1505,7 +1505,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1654,7 +1654,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1761,7 +1761,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1910,7 +1910,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2050,7 +2050,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2157,7 +2157,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2264,7 +2264,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2413,7 +2413,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2532,7 +2532,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 4) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2809,7 +2809,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2948,7 +2948,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -3069,7 +3069,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -3189,7 +3189,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -3338,8 +3338,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -3481,8 +3481,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -3725,7 +3725,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 5 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) Reducer 5 <- Map 4 (GROUP, 1) Reducer 7 <- Map 6 (GROUP, 1) @@ -3993,8 +3993,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 5 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Reducer 2 (GROUP, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) Reducer 5 <- Map 4 (GROUP, 1) Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### @@ -4264,8 +4264,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 4) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4), Reducer 9 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) Reducer 7 <- Map 6 (GROUP, 1) Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### @@ -5107,6 +5107,25 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Input: default@srcpart_date +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Input: default@srcpart_date +#### A masked pattern was here #### +0 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 @@ -5378,7 +5397,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 4) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 3 @@ -5493,130 +5512,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: srcpart_date - filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 day(_col0) (type: int) - 1 day(_col0) (type: int) - Select Operator - expressions: day(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target column: ds (string) - partition key expr: day(ds) - Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE - target work: Map 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 day(_col0) (type: int) - 1 day(_col0) (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from srcpart join srcpart_date on (day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Input: default@srcpart_date -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Input: default@srcpart_date -#### A masked pattern was here #### -1000 PREHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' @@ -6201,6 +6096,21 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_hour +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_hour +#### A masked pattern was here #### +0 PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) @@ -6316,8 +6226,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 5 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Reducer 2 (GROUP, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) Reducer 5 <- Map 4 (GROUP, 1) Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### @@ -6472,10 +6382,195 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2008-04-08 2008-04-09 -PREHOOK: query: drop table srcpart_orc +PREHOOK: query: create table srcpart_parquet (key int, value string) partitioned by (ds string, hr int) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_parquet +POSTHOOK: query: create table srcpart_parquet (key int, value string) partitioned by (ds string, hr int) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_parquet +PREHOOK: query: insert into table srcpart_parquet partition (ds, hr) select key, value, ds, hr from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_parquet +POSTHOOK: query: insert into table srcpart_parquet partition (ds, hr) select key, value, ds, hr from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_parquet@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_parquet@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_parquet@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_parquet@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN select count(*) from srcpart_parquet join srcpart_date_hour on (srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_parquet join srcpart_date_hour on (srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_date_hour + filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string), UDFToDouble(_col1) (type: double) + 1 _col0 (type: string), UDFToDouble(_col2) (type: double) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Select Operator + expressions: UDFToDouble(_col2) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: hr (int) + partition key expr: UDFToDouble(hr) + Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_parquet + Statistics: Num rows: 2000 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string), hr (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), UDFToDouble(_col1) (type: double) + 1 _col0 (type: string), UDFToDouble(_col2) (type: double) + input vertices: + 1 Map 3 + Statistics: Num rows: 2200 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_parquet join srcpart_date_hour on (srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date_hour +PREHOOK: Input: default@srcpart_parquet +PREHOOK: Input: default@srcpart_parquet@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_parquet@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_parquet@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_parquet@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_parquet join srcpart_date_hour on (srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date_hour +POSTHOOK: Input: default@srcpart_parquet +POSTHOOK: Input: default@srcpart_parquet@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_parquet@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_parquet@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_parquet@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1000 +PREHOOK: query: select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +#### A masked pattern was here #### +1000 +PREHOOK: query: drop table srcpart_parquet PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table srcpart_orc +PREHOOK: Input: default@srcpart_parquet +PREHOOK: Output: default@srcpart_parquet +POSTHOOK: query: drop table srcpart_parquet POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_parquet +POSTHOOK: Output: default@srcpart_parquet PREHOOK: query: drop table srcpart_date PREHOOK: type: DROPTABLE PREHOOK: Input: default@srcpart_date diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out index 82e746a862..3ae922f6e1 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out @@ -1,65 +1,1307 @@ -PREHOOK: query: create table partitioned_table1 (col int) partitioned by (part_col int) +PREHOOK: query: CREATE TABLE partitioned_table1 (col int) PARTITIONED BY (part_col int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@partitioned_table1 -POSTHOOK: query: create table partitioned_table1 (col int) partitioned by (part_col int) +POSTHOOK: query: CREATE TABLE partitioned_table1 (col int) PARTITIONED BY (part_col int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@partitioned_table1 -PREHOOK: query: create table partitioned_table2 (col int) partitioned by (part_col int) +PREHOOK: query: CREATE TABLE partitioned_table2 (col int) PARTITIONED BY (part_col int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@partitioned_table2 -POSTHOOK: query: create table partitioned_table2 (col int) partitioned by (part_col int) +POSTHOOK: query: CREATE TABLE partitioned_table2 (col int) PARTITIONED BY (part_col int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@partitioned_table2 -PREHOOK: query: create table regular_table (col int) +PREHOOK: query: CREATE TABLE partitioned_table3 (col int) PARTITIONED BY (part_col int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@regular_table -POSTHOOK: query: create table regular_table (col int) +PREHOOK: Output: default@partitioned_table3 +POSTHOOK: query: CREATE TABLE partitioned_table3 (col int) PARTITIONED BY (part_col int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@regular_table -PREHOOK: query: insert into table regular_table values (1) -PREHOOK: type: QUERY -PREHOOK: Output: default@regular_table -POSTHOOK: query: insert into table regular_table values (1) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@regular_table -POSTHOOK: Lineage: regular_table.col EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: alter table partitioned_table1 add partition (part_col = 1) +POSTHOOK: Output: default@partitioned_table3 +PREHOOK: query: CREATE TABLE partitioned_table4 (col int) PARTITIONED BY (part_col1 int, part_col2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned_table4 +POSTHOOK: query: CREATE TABLE partitioned_table4 (col int) PARTITIONED BY (part_col1 int, part_col2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned_table4 +PREHOOK: query: CREATE TABLE partitioned_table5 (col int) PARTITIONED BY (part_col1 int, part_col2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned_table5 +POSTHOOK: query: CREATE TABLE partitioned_table5 (col int) PARTITIONED BY (part_col1 int, part_col2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned_table5 +PREHOOK: query: CREATE TABLE regular_table1 (col1 int, col2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@regular_table1 +POSTHOOK: query: CREATE TABLE regular_table1 (col1 int, col2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@regular_table1 +PREHOOK: query: CREATE TABLE regular_table2 (col1 int, col2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@regular_table2 +POSTHOOK: query: CREATE TABLE regular_table2 (col1 int, col2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@regular_table2 +PREHOOK: query: ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 1) PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Output: default@partitioned_table1 -POSTHOOK: query: alter table partitioned_table1 add partition (part_col = 1) +POSTHOOK: query: ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 1) POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Output: default@partitioned_table1 POSTHOOK: Output: default@partitioned_table1@part_col=1 -PREHOOK: query: insert into table partitioned_table1 partition (part_col = 1) values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10) -PREHOOK: type: QUERY -PREHOOK: Output: default@partitioned_table1@part_col=1 -POSTHOOK: query: insert into table partitioned_table1 partition (part_col = 1) values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@partitioned_table1@part_col=1 -POSTHOOK: Lineage: partitioned_table1 PARTITION(part_col=1).col EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: alter table partitioned_table2 add partition (part_col = 1) +PREHOOK: query: ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 2) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table1 +POSTHOOK: query: ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 2) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table1 +POSTHOOK: Output: default@partitioned_table1@part_col=2 +PREHOOK: query: ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 3) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table1 +POSTHOOK: query: ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 3) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table1 +POSTHOOK: Output: default@partitioned_table1@part_col=3 +PREHOOK: query: ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 1) PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Output: default@partitioned_table2 -POSTHOOK: query: alter table partitioned_table2 add partition (part_col = 1) +POSTHOOK: query: ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 1) POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Output: default@partitioned_table2 POSTHOOK: Output: default@partitioned_table2@part_col=1 -PREHOOK: query: insert into table partitioned_table2 partition (part_col = 1) values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10) +PREHOOK: query: ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 2) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table2 +POSTHOOK: query: ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 2) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table2 +POSTHOOK: Output: default@partitioned_table2@part_col=2 +PREHOOK: query: ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 3) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table2 +POSTHOOK: query: ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 3) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table2 +POSTHOOK: Output: default@partitioned_table2@part_col=3 +PREHOOK: query: ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 1) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table3 +POSTHOOK: query: ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 1) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table3 +POSTHOOK: Output: default@partitioned_table3@part_col=1 +PREHOOK: query: ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 2) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table3 +POSTHOOK: query: ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 2) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table3 +POSTHOOK: Output: default@partitioned_table3@part_col=2 +PREHOOK: query: ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 3) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table3 +POSTHOOK: query: ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 3) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table3 +POSTHOOK: Output: default@partitioned_table3@part_col=3 +PREHOOK: query: ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 1, part_col2 = 1) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table4 +POSTHOOK: query: ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 1, part_col2 = 1) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table4 +POSTHOOK: Output: default@partitioned_table4@part_col1=1/part_col2=1 +PREHOOK: query: ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 2, part_col2 = 2) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table4 +POSTHOOK: query: ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 2, part_col2 = 2) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table4 +POSTHOOK: Output: default@partitioned_table4@part_col1=2/part_col2=2 +PREHOOK: query: ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 3, part_col2 = 3) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table4 +POSTHOOK: query: ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 3, part_col2 = 3) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table4 +POSTHOOK: Output: default@partitioned_table4@part_col1=3/part_col2=3 +PREHOOK: query: ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 1, part_col2 = 1) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table5 +POSTHOOK: query: ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 1, part_col2 = 1) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table5 +POSTHOOK: Output: default@partitioned_table5@part_col1=1/part_col2=1 +PREHOOK: query: ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 2, part_col2 = 2) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table5 +POSTHOOK: query: ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 2, part_col2 = 2) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table5 +POSTHOOK: Output: default@partitioned_table5@part_col1=2/part_col2=2 +PREHOOK: query: ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 3, part_col2 = 3) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@partitioned_table5 +POSTHOOK: query: ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 3, part_col2 = 3) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@partitioned_table5 +POSTHOOK: Output: default@partitioned_table5@part_col1=3/part_col2=3 +PREHOOK: query: INSERT INTO TABLE regular_table1 VALUES (0, 0), (1, 1), (2, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@regular_table1 +POSTHOOK: query: INSERT INTO TABLE regular_table1 VALUES (0, 0), (1, 1), (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@regular_table1 +POSTHOOK: Lineage: regular_table1.col1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: regular_table1.col2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE regular_table2 VALUES (0, 0), (1, 1), (2, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@regular_table2 +POSTHOOK: query: INSERT INTO TABLE regular_table2 VALUES (0, 0), (1, 1), (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@regular_table2 +POSTHOOK: Lineage: regular_table2.col1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: regular_table2.col2 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table1@part_col=1 +POSTHOOK: query: INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table1@part_col=1 +POSTHOOK: Lineage: partitioned_table1 PARTITION(part_col=1).col EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table1@part_col=2 +POSTHOOK: query: INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table1@part_col=2 +POSTHOOK: Lineage: partitioned_table1 PARTITION(part_col=2).col EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table1@part_col=3 +POSTHOOK: query: INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table1@part_col=3 +POSTHOOK: Lineage: partitioned_table1 PARTITION(part_col=3).col EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table2@part_col=1 +POSTHOOK: query: INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table2@part_col=1 +POSTHOOK: Lineage: partitioned_table2 PARTITION(part_col=1).col EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table2@part_col=2 +POSTHOOK: query: INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table2@part_col=2 +POSTHOOK: Lineage: partitioned_table2 PARTITION(part_col=2).col EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table2@part_col=3 +POSTHOOK: query: INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table2@part_col=3 +POSTHOOK: Lineage: partitioned_table2 PARTITION(part_col=3).col EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table3@part_col=1 +POSTHOOK: query: INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 1) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table3@part_col=1 +POSTHOOK: Lineage: partitioned_table3 PARTITION(part_col=1).col EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table3@part_col=2 +POSTHOOK: query: INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 2) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table3@part_col=2 +POSTHOOK: Lineage: partitioned_table3 PARTITION(part_col=2).col EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table3@part_col=3 +POSTHOOK: query: INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 3) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table3@part_col=3 +POSTHOOK: Lineage: partitioned_table3 PARTITION(part_col=3).col EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 1, part_col2 = 1) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table4@part_col1=1/part_col2=1 +POSTHOOK: query: INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 1, part_col2 = 1) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table4@part_col1=1/part_col2=1 +POSTHOOK: Lineage: partitioned_table4 PARTITION(part_col1=1,part_col2=1).col EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 2, part_col2 = 2) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table4@part_col1=2/part_col2=2 +POSTHOOK: query: INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 2, part_col2 = 2) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table4@part_col1=2/part_col2=2 +POSTHOOK: Lineage: partitioned_table4 PARTITION(part_col1=2,part_col2=2).col EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 3, part_col2 = 3) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table4@part_col1=3/part_col2=3 +POSTHOOK: query: INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 3, part_col2 = 3) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table4@part_col1=3/part_col2=3 +POSTHOOK: Lineage: partitioned_table4 PARTITION(part_col1=3,part_col2=3).col EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 1, part_col2 = 1) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table5@part_col1=1/part_col2=1 +POSTHOOK: query: INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 1, part_col2 = 1) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table5@part_col1=1/part_col2=1 +POSTHOOK: Lineage: partitioned_table5 PARTITION(part_col1=1,part_col2=1).col EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 2, part_col2 = 2) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table5@part_col1=2/part_col2=2 +POSTHOOK: query: INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 2, part_col2 = 2) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table5@part_col1=2/part_col2=2 +POSTHOOK: Lineage: partitioned_table5 PARTITION(part_col1=2,part_col2=2).col EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 3, part_col2 = 3) VALUES (1), (2), (3), (4), (5), (6) +PREHOOK: type: QUERY +PREHOOK: Output: default@partitioned_table5@part_col1=3/part_col2=3 +POSTHOOK: query: INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 3, part_col2 = 3) VALUES (1), (2), (3), (4), (5), (6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@partitioned_table5@part_col1=3/part_col2=3 +POSTHOOK: Lineage: partitioned_table5 PARTITION(part_col1=3,part_col2=3).col EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- ensure the partitioned tables are treated as big tables + + +EXPLAIN SELECT count(*) FROM partitioned_table1 WHERE partitioned_table1.part_col IN ( +SELECT regular_table1.col1 FROM regular_table1 JOIN partitioned_table2 ON +regular_table1.col1 = partitioned_table2.part_col AND partitioned_table2.col > 3 AND regular_table1.col1 > 1) +PREHOOK: type: QUERY +POSTHOOK: query: -- ensure the partitioned tables are treated as big tables + + +EXPLAIN SELECT count(*) FROM partitioned_table1 WHERE partitioned_table1.part_col IN ( +SELECT regular_table1.col1 FROM regular_table1 JOIN partitioned_table2 ON +regular_table1.col1 = partitioned_table2.part_col AND partitioned_table2.col > 3 AND regular_table1.col1 > 1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: regular_table1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (col1 > 1) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target work: Map 4 + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: partitioned_table2 + Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (col > 3) (type: boolean) + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0 + input vertices: + 0 Map 3 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: partitioned_table1 + Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(*) +FROM partitioned_table1 +WHERE partitioned_table1.part_col IN (SELECT regular_table1.col1 + FROM regular_table1 + JOIN partitioned_table2 + ON + regular_table1.col1 = partitioned_table2.part_col AND partitioned_table2.col > 3 AND regular_table1.col1 > 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned_table1 +PREHOOK: Input: default@partitioned_table1@part_col=2 +PREHOOK: Input: default@partitioned_table1@part_col=3 +PREHOOK: Input: default@partitioned_table2 +PREHOOK: Input: default@partitioned_table2@part_col=2 +PREHOOK: Input: default@partitioned_table2@part_col=3 +PREHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM partitioned_table1 +WHERE partitioned_table1.part_col IN (SELECT regular_table1.col1 + FROM regular_table1 + JOIN partitioned_table2 + ON + regular_table1.col1 = partitioned_table2.part_col AND partitioned_table2.col > 3 AND regular_table1.col1 > 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned_table1 +POSTHOOK: Input: default@partitioned_table1@part_col=2 +POSTHOOK: Input: default@partitioned_table1@part_col=3 +POSTHOOK: Input: default@partitioned_table2 +POSTHOOK: Input: default@partitioned_table2@part_col=2 +POSTHOOK: Input: default@partitioned_table2@part_col=3 +POSTHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +6 +PREHOOK: query: EXPLAIN SELECT count(*) FROM partitioned_table1, regular_table1 rt1, +regular_table1 rt2 WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 = +partitioned_table1.part_col AND rt1.col2 > 0 AND rt2.col2 > 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(*) FROM partitioned_table1, regular_table1 rt1, +regular_table1 rt2 WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 = +partitioned_table1.part_col AND rt1.col2 > 0 AND rt2.col2 > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: rt1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((col2 > 0) and col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Local Work: + Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: rt2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((col2 > 1) and col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: partitioned_table1 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 39 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(*) +FROM partitioned_table1, + regular_table1 rt1, + regular_table1 rt2 +WHERE rt1.col1 = partitioned_table1.part_col + AND rt2.col1 = partitioned_table1.part_col + AND rt1.col2 > 0 + AND rt2.col2 > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned_table1 +PREHOOK: Input: default@partitioned_table1@part_col=1 +PREHOOK: Input: default@partitioned_table1@part_col=2 +PREHOOK: Input: default@partitioned_table1@part_col=3 +PREHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM partitioned_table1, + regular_table1 rt1, + regular_table1 rt2 +WHERE rt1.col1 = partitioned_table1.part_col + AND rt2.col1 = partitioned_table1.part_col + AND rt1.col2 > 0 + AND rt2.col2 > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned_table1 +POSTHOOK: Input: default@partitioned_table1@part_col=1 +POSTHOOK: Input: default@partitioned_table1@part_col=2 +POSTHOOK: Input: default@partitioned_table1@part_col=3 +POSTHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +6 +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN SELECT count(*) FROM partitioned_table1, partitioned_table2, regular_table1 rt1 +, regular_table1 rt2 WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 = +partitioned_table2.part_col AND rt1.col2 > 0 AND rt2.col2 > 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(*) FROM partitioned_table1, partitioned_table2, regular_table1 rt1 +, regular_table1 rt2 WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 = +partitioned_table2.part_col AND rt1.col2 > 0 AND rt2.col2 > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: rt1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((col2 > 0) and col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Local Work: + Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: rt2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((col2 > 1) and col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target work: Map 4 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: partitioned_table1 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: partitioned_table2 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 324 Data size: 972 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 356 Data size: 1069 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 6 + Statistics: Num rows: 391 Data size: 1175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: SELECT count(*) +FROM partitioned_table1, + partitioned_table2, + regular_table1 rt1, + regular_table1 rt2 +WHERE rt1.col1 = partitioned_table1.part_col + AND rt2.col1 = partitioned_table2.part_col + AND rt1.col2 > 0 + AND rt2.col2 > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned_table1 +PREHOOK: Input: default@partitioned_table1@part_col=1 +PREHOOK: Input: default@partitioned_table1@part_col=2 +PREHOOK: Input: default@partitioned_table1@part_col=3 +PREHOOK: Input: default@partitioned_table2 +PREHOOK: Input: default@partitioned_table2@part_col=1 +PREHOOK: Input: default@partitioned_table2@part_col=2 +PREHOOK: Input: default@partitioned_table2@part_col=3 +PREHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM partitioned_table1, + partitioned_table2, + regular_table1 rt1, + regular_table1 rt2 +WHERE rt1.col1 = partitioned_table1.part_col + AND rt2.col1 = partitioned_table2.part_col + AND rt1.col2 > 0 + AND rt2.col2 > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned_table1 +POSTHOOK: Input: default@partitioned_table1@part_col=1 +POSTHOOK: Input: default@partitioned_table1@part_col=2 +POSTHOOK: Input: default@partitioned_table1@part_col=3 +POSTHOOK: Input: default@partitioned_table2 +POSTHOOK: Input: default@partitioned_table2@part_col=1 +POSTHOOK: Input: default@partitioned_table2@part_col=2 +POSTHOOK: Input: default@partitioned_table2@part_col=3 +POSTHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +72 +PREHOOK: query: EXPLAIN SELECT count(*) FROM regular_table1, partitioned_table1 WHERE regular_table1.col1 += partitioned_table1.part_col AND regular_table1.col2 BETWEEN 1 AND 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(*) FROM regular_table1, partitioned_table1 WHERE regular_table1.col1 += partitioned_table1.part_col AND regular_table1.col2 BETWEEN 1 AND 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: regular_table1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (col2 BETWEEN 1 AND 3 and col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target work: Map 2 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: partitioned_table1 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(*) +FROM regular_table1, + partitioned_table1 +WHERE regular_table1.col1 = partitioned_table1.part_col + AND regular_table1.col2 BETWEEN 1 AND 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned_table1 +PREHOOK: Input: default@partitioned_table1@part_col=1 +PREHOOK: Input: default@partitioned_table1@part_col=2 +PREHOOK: Input: default@partitioned_table1@part_col=3 +PREHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM regular_table1, + partitioned_table1 +WHERE regular_table1.col1 = partitioned_table1.part_col + AND regular_table1.col2 BETWEEN 1 AND 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned_table1 +POSTHOOK: Input: default@partitioned_table1@part_col=1 +POSTHOOK: Input: default@partitioned_table1@part_col=2 +POSTHOOK: Input: default@partitioned_table1@part_col=3 +POSTHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +12 +PREHOOK: query: EXPLAIN WITH q1 AS (SELECT regular_table1.col1 AS col FROM regular_table1 WHERE +regular_table1.col2 > 1), q2 AS (SELECT partitioned_table1.part_col AS col FROM +partitioned_table1 WHERE partitioned_table1.col > 1) SELECT count(*) FROM q1 JOIN q2 ON +q1.col = q2.col +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN WITH q1 AS (SELECT regular_table1.col1 AS col FROM regular_table1 WHERE +regular_table1.col2 > 1), q2 AS (SELECT partitioned_table1.part_col AS col FROM +partitioned_table1 WHERE partitioned_table1.col > 1) SELECT count(*) FROM q1 JOIN q2 ON +q1.col = q2.col +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: regular_table1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((col2 > 1) and col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target work: Map 2 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: partitioned_table1 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (col > 1) (type: boolean) + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: WITH q1 + AS (SELECT regular_table1.col1 AS col + FROM regular_table1 + WHERE regular_table1.col2 > 1), + q2 + AS (SELECT partitioned_table1.part_col AS col + FROM partitioned_table1 + WHERE partitioned_table1.col > 1) +SELECT count(*) +FROM q1 + JOIN q2 + ON q1.col = q2.col +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned_table1 +PREHOOK: Input: default@partitioned_table1@part_col=1 +PREHOOK: Input: default@partitioned_table1@part_col=2 +PREHOOK: Input: default@partitioned_table1@part_col=3 +PREHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +POSTHOOK: query: WITH q1 + AS (SELECT regular_table1.col1 AS col + FROM regular_table1 + WHERE regular_table1.col2 > 1), + q2 + AS (SELECT partitioned_table1.part_col AS col + FROM partitioned_table1 + WHERE partitioned_table1.col > 1) +SELECT count(*) +FROM q1 + JOIN q2 + ON q1.col = q2.col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned_table1 +POSTHOOK: Input: default@partitioned_table1@part_col=1 +POSTHOOK: Input: default@partitioned_table1@part_col=2 +POSTHOOK: Input: default@partitioned_table1@part_col=3 +POSTHOOK: Input: default@regular_table1 +#### A masked pattern was here #### +5 +PREHOOK: query: EXPLAIN SELECT count(*) FROM partitioned_table1, partitioned_table2 WHERE +partitioned_table1.part_col = partitioned_table2.part_col AND partitioned_table2.col > 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(*) FROM partitioned_table1, partitioned_table2 WHERE +partitioned_table1.part_col = partitioned_table2.part_col AND partitioned_table2.col > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: partitioned_table2 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (col > 1) (type: boolean) + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: partitioned_table1 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part_col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(*) +FROM partitioned_table1, + partitioned_table2 +WHERE partitioned_table1.part_col = partitioned_table2.part_col + AND partitioned_table2.col > 1 PREHOOK: type: QUERY -PREHOOK: Output: default@partitioned_table2@part_col=1 -POSTHOOK: query: insert into table partitioned_table2 partition (part_col = 1) values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10) +PREHOOK: Input: default@partitioned_table1 +PREHOOK: Input: default@partitioned_table1@part_col=1 +PREHOOK: Input: default@partitioned_table1@part_col=2 +PREHOOK: Input: default@partitioned_table1@part_col=3 +PREHOOK: Input: default@partitioned_table2 +PREHOOK: Input: default@partitioned_table2@part_col=1 +PREHOOK: Input: default@partitioned_table2@part_col=2 +PREHOOK: Input: default@partitioned_table2@part_col=3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM partitioned_table1, + partitioned_table2 +WHERE partitioned_table1.part_col = partitioned_table2.part_col + AND partitioned_table2.col > 1 POSTHOOK: type: QUERY -POSTHOOK: Output: default@partitioned_table2@part_col=1 -POSTHOOK: Lineage: partitioned_table2 PARTITION(part_col=1).col EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain select * from partitioned_table1, partitioned_table2 where partitioned_table1.part_col = partitioned_table2.part_col +POSTHOOK: Input: default@partitioned_table1 +POSTHOOK: Input: default@partitioned_table1@part_col=1 +POSTHOOK: Input: default@partitioned_table1@part_col=2 +POSTHOOK: Input: default@partitioned_table1@part_col=3 +POSTHOOK: Input: default@partitioned_table2 +POSTHOOK: Input: default@partitioned_table2@part_col=1 +POSTHOOK: Input: default@partitioned_table2@part_col=2 +POSTHOOK: Input: default@partitioned_table2@part_col=3 +#### A masked pattern was here #### +90 +PREHOOK: query: -- set auto convert size to a higher value so map-joins are triggered for the partitioned tables + +EXPLAIN SELECT count(*) FROM partitioned_table4, partitioned_table5 WHERE +partitioned_table4.part_col1 = partitioned_table5.part_col1 AND +partitioned_table4.part_col2 = partitioned_table5.part_col2 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from partitioned_table1, partitioned_table2 where partitioned_table1.part_col = partitioned_table2.part_col +POSTHOOK: query: -- set auto convert size to a higher value so map-joins are triggered for the partitioned tables + +EXPLAIN SELECT count(*) FROM partitioned_table4, partitioned_table5 WHERE +partitioned_table4.part_col1 = partitioned_table5.part_col1 AND +partitioned_table4.part_col2 = partitioned_table5.part_col2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -71,68 +1313,99 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan - alias: partitioned_table2 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + alias: partitioned_table5 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part_col (type: int) + expressions: part_col1 (type: int), part_col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col1 (int) + partition key expr: part_col1 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator - Target column: part_col (int) - partition key expr: part_col - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Target column: part_col2 (int) + partition key expr: part_col2 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE target work: Map 1 Local Work: Map Reduce Local Work Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: partitioned_table1 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + alias: partitioned_table4 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part_col (type: int) + expressions: part_col1 (type: int), part_col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 3 + Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -140,135 +1413,174 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select * from partitioned_table1 where partitioned_table1.part_col in (select regular_table.col from regular_table join partitioned_table2 on regular_table.col = partitioned_table2.part_col) +PREHOOK: query: SELECT count(*) +FROM partitioned_table4, + partitioned_table5 +WHERE partitioned_table4.part_col1 = partitioned_table5.part_col1 + AND partitioned_table4.part_col2 = partitioned_table5.part_col2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned_table4 +PREHOOK: Input: default@partitioned_table4@part_col1=1/part_col2=1 +PREHOOK: Input: default@partitioned_table4@part_col1=2/part_col2=2 +PREHOOK: Input: default@partitioned_table4@part_col1=3/part_col2=3 +PREHOOK: Input: default@partitioned_table5 +PREHOOK: Input: default@partitioned_table5@part_col1=1/part_col2=1 +PREHOOK: Input: default@partitioned_table5@part_col1=2/part_col2=2 +PREHOOK: Input: default@partitioned_table5@part_col1=3/part_col2=3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM partitioned_table4, + partitioned_table5 +WHERE partitioned_table4.part_col1 = partitioned_table5.part_col1 + AND partitioned_table4.part_col2 = partitioned_table5.part_col2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned_table4 +POSTHOOK: Input: default@partitioned_table4@part_col1=1/part_col2=1 +POSTHOOK: Input: default@partitioned_table4@part_col1=2/part_col2=2 +POSTHOOK: Input: default@partitioned_table4@part_col1=3/part_col2=3 +POSTHOOK: Input: default@partitioned_table5 +POSTHOOK: Input: default@partitioned_table5@part_col1=1/part_col2=1 +POSTHOOK: Input: default@partitioned_table5@part_col1=2/part_col2=2 +POSTHOOK: Input: default@partitioned_table5@part_col1=3/part_col2=3 +#### A masked pattern was here #### +108 +PREHOOK: query: EXPLAIN SELECT count(*) FROM partitioned_table1 JOIN regular_table1 ON +partitioned_table1.part_col = regular_table1.col1 JOIN regular_table2 ON +regular_table1.col1 = regular_table2.col1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from partitioned_table1 where partitioned_table1.part_col in (select regular_table.col from regular_table join partitioned_table2 on regular_table.col = partitioned_table2.part_col) +POSTHOOK: query: EXPLAIN SELECT count(*) FROM partitioned_table1 JOIN regular_table1 ON +partitioned_table1.part_col = regular_table1.col1 JOIN regular_table2 ON +regular_table1.col1 = regular_table2.col1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan - alias: regular_table - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + alias: regular_table1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: col is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + predicate: col1 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int) + expressions: col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) 1 _col0 (type: int) + 2 _col0 (type: int) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator Target column: part_col (int) partition key expr: part_col - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - target work: Map 3 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 Local Work: Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan - alias: partitioned_table2 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: part_col (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + alias: regular_table2 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: col1 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: int) outputColumnNames: _col0 - input vertices: - 0 Map 2 - Statistics: Num rows: 11 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Select Operator - expressions: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target column: part_col (int) - partition key expr: part_col - Statistics: Num rows: 11 Data size: 12 Basic stats: COMPLETE Column stats: NONE - target work: Map 1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: part_col (int) + partition key expr: part_col + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 Local Work: Map Reduce Local Work Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: partitioned_table1 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part_col (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 11 Basic stats: COMPLETE Column stats: NONE + expressions: part_col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 18 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 + Inner Join 1 to 2 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + 2 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 12 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 2 Map 4 + Statistics: Num rows: 39 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -276,3 +1588,204 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT count(*) +FROM partitioned_table1 + JOIN regular_table1 + ON partitioned_table1.part_col = regular_table1.col1 + JOIN regular_table2 + ON regular_table1.col1 = regular_table2.col1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned_table1 +PREHOOK: Input: default@partitioned_table1@part_col=1 +PREHOOK: Input: default@partitioned_table1@part_col=2 +PREHOOK: Input: default@partitioned_table1@part_col=3 +PREHOOK: Input: default@regular_table1 +PREHOOK: Input: default@regular_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM partitioned_table1 + JOIN regular_table1 + ON partitioned_table1.part_col = regular_table1.col1 + JOIN regular_table2 + ON regular_table1.col1 = regular_table2.col1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned_table1 +POSTHOOK: Input: default@partitioned_table1@part_col=1 +POSTHOOK: Input: default@partitioned_table1@part_col=2 +POSTHOOK: Input: default@partitioned_table1@part_col=3 +POSTHOOK: Input: default@regular_table1 +POSTHOOK: Input: default@regular_table2 +#### A masked pattern was here #### +12 +PREHOOK: query: -- reset auto convert size to previous value + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +DROP TABLE partitioned_table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned_table1 +PREHOOK: Output: default@partitioned_table1 +POSTHOOK: query: -- reset auto convert size to previous value + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +DROP TABLE partitioned_table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned_table1 +POSTHOOK: Output: default@partitioned_table1 +PREHOOK: query: DROP TABLE partitioned_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned_table2 +PREHOOK: Output: default@partitioned_table2 +POSTHOOK: query: DROP TABLE partitioned_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned_table2 +POSTHOOK: Output: default@partitioned_table2 +PREHOOK: query: DROP TABLE partitioned_table3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned_table3 +PREHOOK: Output: default@partitioned_table3 +POSTHOOK: query: DROP TABLE partitioned_table3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned_table3 +POSTHOOK: Output: default@partitioned_table3 +PREHOOK: query: DROP TABLE partitioned_table4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned_table4 +PREHOOK: Output: default@partitioned_table4 +POSTHOOK: query: DROP TABLE partitioned_table4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned_table4 +POSTHOOK: Output: default@partitioned_table4 +PREHOOK: query: DROP TABLE partitioned_table5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned_table5 +PREHOOK: Output: default@partitioned_table5 +POSTHOOK: query: DROP TABLE partitioned_table5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned_table5 +POSTHOOK: Output: default@partitioned_table5 +PREHOOK: query: DROP TABLE regular_table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@regular_table1 +PREHOOK: Output: default@regular_table1 +POSTHOOK: query: DROP TABLE regular_table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@regular_table1 +POSTHOOK: Output: default@regular_table1 +PREHOOK: query: DROP TABLE regular_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@regular_table2 +PREHOOK: Output: default@regular_table2 +POSTHOOK: query: DROP TABLE regular_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@regular_table2 +POSTHOOK: Output: default@regular_table2