diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e251920d8b..5bdcac88d0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2247,7 +2247,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION("hive.combine.equivalent.work.optimization", true, "Whether to " + "combine equivalent work objects during physical optimization.\n This optimization looks for equivalent " + "work objects and combines them if they meet certain preconditions. Spark only."), - HIVE_REMOVE_SQ_COUNT_CHECK("hive.optimize.remove.sq_count_check", false, + HIVE_REMOVE_SQ_COUNT_CHECK("hive.optimize.remove.sq_count_check", true, "Whether to remove an extra join with sq_count_check for scalar subqueries " + "with constant group by keys."), diff --git a/ql/src/test/queries/clientpositive/subquery_scalar.q b/ql/src/test/queries/clientpositive/subquery_scalar.q index 50b8ece399..a7322f5978 100644 --- a/ql/src/test/queries/clientpositive/subquery_scalar.q +++ b/ql/src/test/queries/clientpositive/subquery_scalar.q @@ -252,12 +252,9 @@ drop table t_n11; drop table tempty_n0; -- following queries shouldn't have a join with sq_count_check -set hive.optimize.remove.sq_count_check = true; explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 group by 4); explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ); -set hive.optimize.remove.sq_count_check = false; - diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index f7c8ec7e31..4423aec8a2 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -445,8 +445,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where (select i from tnull_n0 limit 1) is null PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where (select i from tnull_n0 limit 1) is null @@ -542,20 +542,24 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col10 is null (type: boolean) - Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col9 is null (type: boolean) + Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -608,8 +612,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where (select i from tnull_n0 limit 1) is null PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2395,8 +2399,8 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) @@ -2410,13 +2414,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 7 (SIMPLE_EDGE) Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 10 (XPROD_EDGE), Reducer 3 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2465,16 +2467,6 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: '90' (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: '90' (type: string) @@ -2489,23 +2481,6 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 10 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2553,13 +2528,11 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col0, _col1, _col3 - residual filter predicates: {(_col1 > _col3)} + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 > _col2)} Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) @@ -2588,37 +2561,19 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) + expressions: _col1 (type: bigint) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2626,8 +2581,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/perf/spark/query44.q.out b/ql/src/test/results/clientpositive/perf/spark/query44.q.out index 6ba55e93a1..6410815a0a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query44.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join JOIN[36][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Work 'Reducer 8' is a cross product -Warning: Shuffle Join JOIN[81][tables = [$hdt$_4, $hdt$_5, $hdt$_3]] in Work 'Reducer 19' is a cross product +Warning: Shuffle Join JOIN[20][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 7' is a cross product +Warning: Shuffle Join JOIN[49][tables = [$hdt$_3, $hdt$_4]] in Work 'Reducer 15' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -76,20 +76,18 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 100) - Reducer 13 <- Map 12 (GROUP, 199) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 1009), Reducer 20 (PARTITION-LEVEL SORT, 1009) - Reducer 17 <- Map 16 (GROUP, 100) - Reducer 18 <- Reducer 17 (GROUP, 1) - Reducer 19 <- Reducer 18 (PARTITION-LEVEL SORT, 1), Reducer 22 (PARTITION-LEVEL SORT, 1), Reducer 24 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009) - Reducer 20 <- Reducer 19 (PARTITION-LEVEL SORT, 1009) - Reducer 22 <- Map 10 (GROUP, 100) - Reducer 24 <- Map 12 (GROUP, 199) - Reducer 3 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) + Reducer 10 <- Map 17 (GROUP, 100) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 1009), Reducer 16 (PARTITION-LEVEL SORT, 1009) + Reducer 14 <- Map 13 (GROUP, 199) + Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 18 (PARTITION-LEVEL SORT, 1) + Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 1009) + Reducer 18 <- Map 17 (GROUP, 100) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1009), Reducer 8 (PARTITION-LEVEL SORT, 1009) + Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT, 1), Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 18 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Reducer 8 (PARTITION-LEVEL SORT, 1009) + Reducer 6 <- Map 13 (GROUP, 199) + Reducer 7 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 8 <- Reducer 7 (PARTITION-LEVEL SORT, 1009) #### A masked pattern was here #### Vertices: Map 1 @@ -112,33 +110,27 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 10 + Map 11 Map Operator Tree: TableScan - alias: store_sales - filterExpr: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: i2 + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean) - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_net_profit (type: decimal(7,2)) - outputColumnNames: _col1 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: 410 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) + expressions: i_item_sk (type: int), i_product_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: ss1 @@ -164,27 +156,7 @@ STAGE PLANS: Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 14 - Map Operator Tree: - TableScan - alias: i2 - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_product_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 16 + Map 17 Map Operator Tree: TableScan alias: store_sales @@ -194,19 +166,23 @@ STAGE PLANS: predicate: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean) Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col1 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Group By Operator + aggregations: sum(_col1), count(_col1) keys: 410 (type: int) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Reducer 11 + Reducer 10 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -223,24 +199,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(37,22)) - Reducer 13 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(37,22)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) - Reducer 15 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -249,128 +208,92 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Reducer 17 + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 18 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 19 + expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(37,22)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) + Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 10367842752596232 Data size: 1922618777862369774 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10367842752596232 Data size: 1839676035841599918 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > (0.9 * _col1)) (type: boolean) - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > (0.9 * _col2)) (type: boolean) + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 0 (type: int), _col3 (type: decimal(37,22)) + key expressions: 0 (type: int), _col1 (type: decimal(37,22)) sort order: +- Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 20 + value expressions: _col0 (type: int) + Reducer 16 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col2: int, _col3: decimal(37,22) + output shape: _col0: int, _col1: decimal(37,22) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 DESC NULLS LAST + order by: _col1 DESC NULLS LAST partition by: 0 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col3 + arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((rank_window_0 < 11) and _col2 is not null) (type: boolean) - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + predicate: ((rank_window_0 < 11) and _col0 is not null) (type: boolean) + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), rank_window_0 (type: int) + expressions: _col0 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Reducer 22 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -387,23 +310,22 @@ STAGE PLANS: sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(37,22)) - Reducer 24 - Execution mode: vectorized + Reducer 2 Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(37,22)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -413,15 +335,15 @@ STAGE PLANS: 0 _col3 (type: int) 1 _col3 (type: int) outputColumnNames: _col1, _col3, _col5 - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: string) Reducer 4 @@ -430,79 +352,94 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 17700 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 17700 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 6 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(37,22)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 10367842752596232 Data size: 1922618777862369774 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10367842752596232 Data size: 1839676035841599918 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > (0.9 * _col1)) (type: boolean) - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > (0.9 * _col2)) (type: boolean) + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 0 (type: int), _col3 (type: decimal(37,22)) + key expressions: 0 (type: int), _col1 (type: decimal(37,22)) sort order: ++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: int) - Reducer 9 + value expressions: _col0 (type: int) + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col2: int, _col3: decimal(37,22) + output shape: _col0: int, _col1: decimal(37,22) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST + order by: _col1 ASC NULLS FIRST partition by: 0 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col3 + arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((rank_window_0 < 11) and _col2 is not null) (type: boolean) - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + predicate: ((rank_window_0 < 11) and _col0 is not null) (type: boolean) + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), rank_window_0 (type: int) + expressions: _col0 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out index 3d2c4da2ef..241d6d8e1f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join JOIN[111][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product -Warning: Shuffle Join JOIN[107][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 14' is a cross product -Warning: Shuffle Join JOIN[114][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[84][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[115][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[145][bigTable=?] in task 'Stage-1:MAPRED' is a cross product Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with my_customers as ( @@ -115,18 +115,19 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 29 <- Map 28 (GROUP, 2) - Reducer 30 <- Reducer 29 (GROUP, 1) + Reducer 28 <- Map 27 (GROUP, 2) + Reducer 29 <- Reducer 28 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 28 + Map 27 Map Operator Tree: TableScan alias: date_dim @@ -136,7 +137,7 @@ STAGE PLANS: predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (d_month_seq + 1) (type: int) + expressions: (d_month_seq + 3) (type: int) outputColumnNames: _col0 Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -150,7 +151,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 29 + Reducer 28 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -169,7 +170,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 30 + Reducer 29 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -191,9 +192,79 @@ STAGE PLANS: Stage: Stage-3 Spark + Edges: + Reducer 23 <- Map 22 (GROUP, 2) + Reducer 24 <- Reducer 23 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 18 + Map 22 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (d_month_seq + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 23 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 24 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 12 Map Operator Tree: TableScan alias: store @@ -217,50 +288,23 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 9 (GROUP, 1) - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 15 (PARTITION-LEVEL SORT, 398) - Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 772), Reducer 17 (PARTITION-LEVEL SORT, 772) - Reducer 14 <- Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 32 (PARTITION-LEVEL SORT, 1) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 654), Reducer 23 (PARTITION-LEVEL SORT, 654) - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 458), Map 24 (PARTITION-LEVEL SORT, 458), Map 25 (PARTITION-LEVEL SORT, 458) - Reducer 21 <- Map 26 (PARTITION-LEVEL SORT, 505), Reducer 20 (PARTITION-LEVEL SORT, 505) - Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 1009), Reducer 21 (PARTITION-LEVEL SORT, 1009) - Reducer 23 <- Reducer 22 (GROUP, 610) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 32 <- Map 31 (GROUP, 2) - Reducer 4 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 1009) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 654), Reducer 17 (PARTITION-LEVEL SORT, 654) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 458), Map 18 (PARTITION-LEVEL SORT, 458), Map 19 (PARTITION-LEVEL SORT, 458) + Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 505), Reducer 14 (PARTITION-LEVEL SORT, 505) + Reducer 16 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009) + Reducer 17 <- Reducer 16 (GROUP, 610) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 26 <- Map 25 (GROUP, 2) + Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 772), Reducer 2 (PARTITION-LEVEL SORT, 772) + Reducer 31 <- Map 30 (GROUP, 2) + Reducer 4 <- Reducer 26 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 31 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Reducer 5 (GROUP, 1009) - Reducer 7 <- Reducer 6 (SORT, 1) - Reducer 9 <- Map 1 (GROUP, 2) + Reducer 7 <- Reducer 6 (GROUP, 1009) + Reducer 8 <- Reducer 7 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (d_month_seq + 3) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 11 Map Operator Tree: TableScan alias: store_sales @@ -280,27 +324,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 15 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_month_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Map 16 + Map 10 Map Operator Tree: TableScan alias: customer_address @@ -321,7 +345,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 18 + 1 Map 12 Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -331,7 +355,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 19 + Map 13 Map Operator Tree: TableScan alias: catalog_sales @@ -351,7 +375,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 24 + Map 18 Map Operator Tree: TableScan alias: web_sales @@ -371,7 +395,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 25 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -390,7 +414,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 26 + Map 20 Map Operator Tree: TableScan alias: item @@ -409,7 +433,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 27 + Map 21 Map Operator Tree: TableScan alias: customer @@ -429,7 +453,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 31 + Map 25 Map Operator Tree: TableScan alias: date_dim @@ -453,83 +477,51 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 10 + Map 30 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (d_month_seq + 3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 12 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) - Reducer 13 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col5 (type: int) - outputColumnNames: _col2, _col4, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col2, _col4, _col10 - input vertices: - 1 Reducer 30 - Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int) - Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col2, _col4, _col10, _col13 - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int) - outputColumnNames: _col0, _col4, _col11, _col13 - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int) - Reducer 17 + Map 9 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_month_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -544,19 +536,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 20 + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -572,7 +552,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Reducer 21 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -587,7 +567,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE - Reducer 22 + Reducer 16 Reduce Operator Tree: Join Operator condition map: @@ -607,7 +587,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE - Reducer 23 + Reducer 17 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -625,24 +605,61 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 3 + Reducer 2 Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {true} + Inner Join 0 to 1 keys: - 0 - 1 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) + Reducer 26 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 32 + Reducer 3 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col2, _col4, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col2, _col4, _col10 + input vertices: + 1 Reducer 24 + Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int) + Reducer 31 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -655,39 +672,69 @@ STAGE PLANS: Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join 0 to 1 keys: 0 1 - outputColumnNames: _col0, _col2, _col6, _col13, _col15 - Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col4, _col10, _col13 + Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col4, _col11, _col13, _col15 - Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col11 BETWEEN _col13 AND _col15 (type: boolean) - Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE + expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col4, _col11, _col13 + Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col4, _col11, _col13 + input vertices: + 1 Reducer 29 + Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col4 (type: decimal(7,2)) - outputColumnNames: _col0, _col4 + expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: decimal(7,2)), _col2 (type: int), _col3 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 BETWEEN _col3 AND _col4 (type: boolean) + Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col4) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 5 + value expressions: _col1 (type: decimal(17,2)) + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -712,7 +759,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -731,7 +778,7 @@ STAGE PLANS: Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: int) - Reducer 7 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -748,25 +795,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/query44.q.out index 0dbce704e6..8105de9ce5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query44.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[141][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -70,146 +70,120 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 10 <- Reducer 8 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_188] - Limit [LIM_187] (rows=100 width=185) + File Output Operator [FS_138] + Limit [LIM_137] (rows=100 width=177) Number of rows:100 - Select Operator [SEL_186] (rows=1393898919384048 width=185) + Select Operator [SEL_136] (rows=1393898919384048 width=177) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_101] - Select Operator [SEL_100] (rows=1393898919384048 width=185) + SHUFFLE [RS_69] + Select Operator [SEL_68] (rows=1393898919384048 width=177) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_145] (rows=1393898919384048 width=185) - Conds:RS_97._col3=RS_98._col3(Inner),Output:["_col1","_col3","_col5"] + Merge Join Operator [MERGEJOIN_107] (rows=1393898919384048 width=177) + Conds:RS_65._col3=RS_66._col3(Inner),Output:["_col1","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_97] + SHUFFLE [RS_65] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_142] (rows=1267180808338276 width=185) - Conds:RS_148._col0=RS_180._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_104] (rows=1267180808338276 width=177) + Conds:RS_110._col0=RS_130._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_147] (rows=462000 width=1436) + Select Operator [SEL_109] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=462000 width=1436) + Filter Operator [FIL_108] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=1436) default@item,i1,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_product_name"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_180] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_179] (rows=1151982528066248 width=185) + Select Operator [SEL_129] (rows=1151982528066248 width=177) Output:["_col0","_col1"] - Filter Operator [FIL_178] (rows=1151982528066248 width=185) - predicate:((rank_window_0 < 11) and _col2 is not null) - PTF Operator [PTF_177] (rows=3455947584198744 width=185) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST","partition by:":"0"}] - Select Operator [SEL_176] (rows=3455947584198744 width=185) - Output:["_col2","_col3"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_38] + Filter Operator [FIL_128] (rows=1151982528066248 width=177) + predicate:((rank_window_0 < 11) and _col0 is not null) + PTF Operator [PTF_127] (rows=3455947584198744 width=177) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"0"}] + Select Operator [SEL_126] (rows=3455947584198744 width=177) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_22] PartitionCols:0 - Filter Operator [FIL_37] (rows=3455947584198744 width=185) - predicate:(_col3 > (0.9 * _col1)) - Merge Join Operator [MERGEJOIN_141] (rows=10367842752596232 width=185) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_168] - Select Operator [SEL_167] (rows=71999454 width=88) + Filter Operator [FIL_21] (rows=3455947584198744 width=177) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_103] (rows=10367842752596232 width=177) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_125] + Select Operator [SEL_124] (rows=71999454 width=88) Output:["_col0"] - Group By Operator [GBY_166] (rows=71999454 width=88) + Group By Operator [GBY_123] (rows=71999454 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_165] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] PartitionCols:_col0 - Group By Operator [GBY_164] (rows=143998908 width=88) + Group By Operator [GBY_121] (rows=143998908 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:410 - Select Operator [SEL_163] (rows=143998908 width=88) + Select Operator [SEL_120] (rows=143998908 width=88) Output:["_col1"] - Filter Operator [FIL_162] (rows=143998908 width=88) + Filter Operator [FIL_119] (rows=143998908 width=88) predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_18] (rows=575995635 width=88) + TableScan [TS_10] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_175] - Select Operator [SEL_174] (rows=143998908 width=88) + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_118] + Select Operator [SEL_117] (rows=143998908 width=88) Output:["_col0","_col1"] - Group By Operator [GBY_173] (rows=143998908 width=88) + Group By Operator [GBY_116] (rows=143998908 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] PartitionCols:_col0 - Group By Operator [GBY_171] (rows=287997817 width=88) + Group By Operator [GBY_114] (rows=287997817 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_170] (rows=287997817 width=88) + Select Operator [SEL_113] (rows=287997817 width=88) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_169] (rows=287997817 width=88) + Filter Operator [FIL_112] (rows=287997817 width=88) predicate:(ss_store_sk = 410) - TableScan [TS_26] (rows=575995635 width=88) + TableScan [TS_3] (rows=575995635 width=88) default@store_sales,ss1,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_161] - Select Operator [SEL_160] (rows=1 width=8) - Filter Operator [FIL_159] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_158] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_157] - Group By Operator [GBY_156] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_155] (rows=71999454 width=88) - Group By Operator [GBY_154] (rows=71999454 width=88) - Output:["_col0"],keys:KEY._col0 - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] - PartitionCols:_col0 - Group By Operator [GBY_152] (rows=143998908 width=88) - Output:["_col0"],keys:410 - Select Operator [SEL_151] (rows=143998908 width=88) - Filter Operator [FIL_150] (rows=143998908 width=88) - predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_hdemo_sk","ss_store_sk"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_98] + SHUFFLE [RS_66] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_144] (rows=1267180808338276 width=185) - Conds:RS_149._col0=RS_185._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_106] (rows=1267180808338276 width=177) + Conds:RS_111._col0=RS_135._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_111] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_147] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] + Please refer to the previous Select Operator [SEL_109] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_184] (rows=1151982528066248 width=185) + Select Operator [SEL_134] (rows=1151982528066248 width=177) Output:["_col0","_col1"] - Filter Operator [FIL_183] (rows=1151982528066248 width=185) - predicate:((rank_window_0 < 11) and _col2 is not null) - PTF Operator [PTF_182] (rows=3455947584198744 width=185) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_181] (rows=3455947584198744 width=185) - Output:["_col2","_col3"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_83] + Filter Operator [FIL_133] (rows=1151982528066248 width=177) + predicate:((rank_window_0 < 11) and _col0 is not null) + PTF Operator [PTF_132] (rows=3455947584198744 width=177) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_131] (rows=3455947584198744 width=177) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_51] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_37] + Please refer to the previous Filter Operator [FIL_21] diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index 3e3c607e0d..c889ce153f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product -Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -115,31 +115,31 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) -Map 16 <- Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Union 17 (CONTAINS) -Map 22 <- Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 17 (CONTAINS) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) -Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Map 27 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 29 (SIMPLE_EDGE) -Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Map 29 (SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 35 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 29 (SIMPLE_EDGE) -Reducer 35 <- Reducer 34 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Map 17 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Union 18 (CONTAINS) +Map 23 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Union 18 (CONTAINS) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 19 <- Map 24 (SIMPLE_EDGE), Union 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (SIMPLE_EDGE) +Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 30 (SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Map 30 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 35 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -147,19 +147,19 @@ Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 9 vectorized + Reducer 10 vectorized File Output Operator [FS_360] Limit [LIM_359] (rows=100 width=158) Number of rows:100 Select Operator [SEL_358] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] - <-Reducer 8 [SIMPLE_EDGE] vectorized + <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_357] Select Operator [SEL_356] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] Group By Operator [GBY_355] (rows=1614130953450400 width=158) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 7 [SIMPLE_EDGE] vectorized + <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_354] PartitionCols:_col0 Group By Operator [GBY_353] (rows=3228261906900801 width=158) @@ -168,268 +168,268 @@ Stage-0 Output:["_col0"] Group By Operator [GBY_351] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_119] PartitionCols:_col0 Group By Operator [GBY_118] (rows=6456523813801603 width=158) - Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col0 + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 Select Operator [SEL_117] (rows=6456523813801603 width=158) - Output:["_col0","_col4"] + Output:["_col0","_col1"] Filter Operator [FIL_116] (rows=6456523813801603 width=158) - predicate:_col11 BETWEEN _col13 AND _col15 - Select Operator [SEL_115] (rows=58108714324214428 width=158) - Output:["_col0","_col4","_col11","_col13","_col15"] - Merge Join Operator [MERGEJOIN_273] (rows=58108714324214428 width=158) - Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_112] - Merge Join Operator [MERGEJOIN_270] (rows=9131 width=1128) - Conds:(Right Outer),Output:["_col0"] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_342] - Group By Operator [GBY_341] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] - PartitionCols:_col0 - Group By Operator [GBY_327] (rows=18262 width=1119) - Output:["_col0"],keys:_col0 - Select Operator [SEL_324] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_322] (rows=18262 width=1119) - predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_73] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_350] - Select Operator [SEL_349] (rows=1 width=8) - Filter Operator [FIL_348] (rows=1 width=8) + predicate:_col2 BETWEEN _col3 AND _col4 + Merge Join Operator [MERGEJOIN_273] (rows=58108714324214428 width=158) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_350] + Group By Operator [GBY_349] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] + PartitionCols:_col0 + Group By Operator [GBY_328] (rows=18262 width=1119) + Output:["_col0"],keys:_col0 + Select Operator [SEL_325] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_322] (rows=18262 width=1119) + predicate:((d_moy = 3) and (d_year = 1999)) + TableScan [TS_50] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_113] + Select Operator [SEL_104] (rows=6363893803988 width=1226) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_272] (rows=6363893803988 width=1226) + Conds:(Inner),Output:["_col0","_col4","_col11","_col13"] + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + Select Operator [SEL_347] (rows=1 width=8) + Filter Operator [FIL_346] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_347] (rows=1 width=8) + Group By Operator [GBY_345] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_346] - Group By Operator [GBY_345] (rows=1 width=8) + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + Group By Operator [GBY_343] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_344] (rows=9131 width=1119) - Group By Operator [GBY_343] (rows=9131 width=1119) + Select Operator [SEL_342] (rows=9131 width=1119) + Group By Operator [GBY_341] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] PartitionCols:_col0 - Group By Operator [GBY_328] (rows=18262 width=1119) + Group By Operator [GBY_327] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_325] (rows=18262 width=1119) + Select Operator [SEL_324] (rows=18262 width=1119) Output:["_col0"] Please refer to the previous Filter Operator [FIL_322] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_113] - Select Operator [SEL_108] (rows=6363893803988 width=1217) - Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_272] (rows=6363893803988 width=1217) - Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_334] - Group By Operator [GBY_332] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] - PartitionCols:_col0 - Group By Operator [GBY_326] (rows=18262 width=1119) - Output:["_col0"],keys:_col0 - Select Operator [SEL_323] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Filter Operator [FIL_322] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_105] - Merge Join Operator [MERGEJOIN_271] (rows=696954748 width=97) - Conds:(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_102] - Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=88) - Conds:RS_99._col1=RS_100._col5(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_100] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_268] (rows=316240138 width=135) - Conds:RS_69._col0=RS_321._col1(Inner),Output:["_col5"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_264] (rows=44000000 width=1014) - Conds:RS_297._col1, _col2=RS_300._col0, _col1(Inner),Output:["_col0"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] - PartitionCols:_col1, _col2 - Select Operator [SEL_296] (rows=40000000 width=1014) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_295] (rows=40000000 width=1014) - predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) - TableScan [TS_29] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] - PartitionCols:_col0, _col1 - Select Operator [SEL_299] (rows=1704 width=1910) + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_101] + Select Operator [SEL_85] (rows=6363893803988 width=1217) + Output:["_col0","_col4","_col11","_col13"] + Merge Join Operator [MERGEJOIN_271] (rows=6363893803988 width=1217) + Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_334] + Group By Operator [GBY_332] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] + PartitionCols:_col0 + Group By Operator [GBY_326] (rows=18262 width=1119) + Output:["_col0"],keys:_col0 + Select Operator [SEL_323] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_322] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_82] + Merge Join Operator [MERGEJOIN_270] (rows=696954748 width=97) + Conds:(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_79] + Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=88) + Conds:RS_76._col1=RS_77._col5(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_268] (rows=316240138 width=135) + Conds:RS_46._col0=RS_321._col1(Inner),Output:["_col5"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_264] (rows=44000000 width=1014) + Conds:RS_297._col1, _col2=RS_300._col0, _col1(Inner),Output:["_col0"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] + PartitionCols:_col1, _col2 + Select Operator [SEL_296] (rows=40000000 width=1014) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_295] (rows=40000000 width=1014) + predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_300] + PartitionCols:_col0, _col1 + Select Operator [SEL_299] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_298] (rows=1704 width=1910) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_9] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] + <-Reducer 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] + PartitionCols:_col1 + Select Operator [SEL_320] (rows=287491029 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_298] (rows=1704 width=1910) - predicate:(s_county is not null and s_state is not null) - TableScan [TS_32] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] - <-Reducer 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] - PartitionCols:_col1 - Select Operator [SEL_320] (rows=287491029 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_319] (rows=287491029 width=135) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_63] - PartitionCols:_col0, _col1 - Group By Operator [GBY_62] (rows=574982058 width=135) - Output:["_col0","_col1"],keys:_col10, _col9 - Merge Join Operator [MERGEJOIN_267] (rows=574982058 width=135) - Conds:RS_58._col1=RS_315._col0(Inner),Output:["_col9","_col10"] - <-Map 27 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_315] - PartitionCols:_col0 - Select Operator [SEL_314] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_313] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_49] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_266] (rows=522710951 width=135) - Conds:RS_55._col2=RS_309._col0(Inner),Output:["_col1"] - <-Map 25 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_309] + Group By Operator [GBY_319] (rows=287491029 width=135) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1 + Group By Operator [GBY_39] (rows=574982058 width=135) + Output:["_col0","_col1"],keys:_col10, _col9 + Merge Join Operator [MERGEJOIN_267] (rows=574982058 width=135) + Conds:RS_35._col1=RS_315._col0(Inner),Output:["_col9","_col10"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_315] PartitionCols:_col0 - Select Operator [SEL_308] (rows=115500 width=1436) - Output:["_col0"] - Filter Operator [FIL_307] (rows=115500 width=1436) - predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) - TableScan [TS_46] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_265] (rows=475191764 width=135) - Conds:Union 17._col0=RS_303._col0(Inner),Output:["_col1","_col2"] - <-Map 23 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] + Select Operator [SEL_314] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_313] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_26] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_266] (rows=522710951 width=135) + Conds:RS_32._col2=RS_309._col0(Inner),Output:["_col1"] + <-Map 26 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_309] PartitionCols:_col0 - Select Operator [SEL_302] (rows=18262 width=1119) + Select Operator [SEL_308] (rows=115500 width=1436) Output:["_col0"] - Filter Operator [FIL_301] (rows=18262 width=1119) - predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_43] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] vectorized - Reduce Output Operator [RS_371] - PartitionCols:_col0 - Select Operator [SEL_370] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_369] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_59_customer_c_customer_sk_min) AND DynamicValue(RS_59_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_59_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_56_item_i_item_sk_min) AND DynamicValue(RS_56_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_56_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_274] (rows=287989836 width=135) - Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_362] - Group By Operator [GBY_361] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - Group By Operator [GBY_305] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_304] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_302] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_365] - Group By Operator [GBY_364] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_312] - Group By Operator [GBY_311] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_310] (rows=115500 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_308] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_368] - Group By Operator [GBY_367] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_318] - Group By Operator [GBY_317] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_316] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_314] - <-Map 22 [CONTAINS] vectorized - Reduce Output Operator [RS_374] - PartitionCols:_col0 - Select Operator [SEL_373] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_372] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_56_item_i_item_sk_min) AND DynamicValue(RS_56_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_56_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_279] (rows=144002668 width=135) - Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_363] - Please refer to the previous Group By Operator [GBY_361] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Please refer to the previous Group By Operator [GBY_364] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_99] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_263] (rows=633595212 width=88) - Conds:RS_294._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] - PartitionCols:_col0 - Select Operator [SEL_285] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_284] (rows=73049 width=1119) - predicate:d_date_sk is not null - TableScan [TS_26] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] - PartitionCols:_col0 - Select Operator [SEL_293] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_292] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_291] - Group By Operator [GBY_290] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] - Group By Operator [GBY_288] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_287] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_285] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Select Operator [SEL_339] (rows=1 width=8) - Filter Operator [FIL_338] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_337] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_336] - Group By Operator [GBY_335] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_333] (rows=9131 width=1119) - Please refer to the previous Group By Operator [GBY_332] + Filter Operator [FIL_307] (rows=115500 width=1436) + predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) + TableScan [TS_23] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_265] (rows=475191764 width=135) + Conds:Union 18._col0=RS_303._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_303] + PartitionCols:_col0 + Select Operator [SEL_302] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_301] (rows=18262 width=1119) + predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_20] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Union 18 [SIMPLE_EDGE] + <-Map 17 [CONTAINS] vectorized + Reduce Output Operator [RS_371] + PartitionCols:_col0 + Select Operator [SEL_370] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_369] (rows=287989836 width=135) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_36_customer_c_customer_sk_min) AND DynamicValue(RS_36_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_36_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_274] (rows=287989836 width=135) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_362] + Group By Operator [GBY_361] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_304] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_302] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_365] + Group By Operator [GBY_364] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_312] + Group By Operator [GBY_311] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_310] (rows=115500 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_308] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_368] + Group By Operator [GBY_367] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_318] + Group By Operator [GBY_317] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] + Select Operator [SEL_316] (rows=80000000 width=860) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_314] + <-Map 23 [CONTAINS] vectorized + Reduce Output Operator [RS_374] + PartitionCols:_col0 + Select Operator [SEL_373] (rows=144002668 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_372] (rows=144002668 width=135) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_279] (rows=144002668 width=135) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_363] + Please refer to the previous Group By Operator [GBY_361] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_366] + Please refer to the previous Group By Operator [GBY_364] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_263] (rows=633595212 width=88) + Conds:RS_294._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] + PartitionCols:_col0 + Select Operator [SEL_285] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_284] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] + PartitionCols:_col0 + Select Operator [SEL_293] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_292] (rows=575995635 width=88) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_291] + Group By Operator [GBY_290] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_287] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_285] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_340] + Select Operator [SEL_339] (rows=1 width=8) + Filter Operator [FIL_338] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_337] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_336] + Group By Operator [GBY_335] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_333] (rows=9131 width=1119) + Please refer to the previous Group By Operator [GBY_332] diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index 76b3e504bb..506433681d 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -539,20 +539,24 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 Statistics: Num rows: 26 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col10 is null (type: boolean) - Statistics: Num rows: 13 Data size: 1768 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col9 is null (type: boolean) Statistics: Num rows: 13 Data size: 1768 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1768 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1768 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -2374,7 +2378,7 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) @@ -2387,13 +2391,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 2) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) Reducer 8 <- Map 7 (GROUP, 2) - Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2410,30 +2412,6 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: s1 - filterExpr: (key = '90') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: '90' (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -2465,33 +2443,18 @@ STAGE PLANS: Select Operator Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator + aggregations: count() keys: '90' (type: string) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized - Reducer 11 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Join Operator @@ -2539,23 +2502,21 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 31250 Data size: 6726500 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31250 Data size: 6476500 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > _col3) (type: boolean) - Statistics: Num rows: 10416 Data size: 2242023 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > _col2) (type: boolean) + Statistics: Num rows: 10416 Data size: 2158695 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10416 Data size: 2242023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10416 Data size: 2158695 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10416 Data size: 2242023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10416 Data size: 2158695 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2576,37 +2537,19 @@ STAGE PLANS: Execution mode: vectorized Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 9 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2615,7 +2558,7 @@ STAGE PLANS: ListSink Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 4' is a cross product PREHOOK: query: select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src