diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index fc9178f156..995ff9b68c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -222,7 +222,7 @@ public RelOptHiveTable copy(RelDataType newRowType) { // Given a key this method returns true if all of the columns in the key are not nullable public boolean isNonNullableKey(ImmutableBitSet columns) { for (ImmutableBitSet key : nonNullablekeys) { - if (key.contains(columns)) { + if (columns.contains(key)) { return true; } } diff --git a/ql/src/test/queries/clientpositive/constraints_optimization.q b/ql/src/test/queries/clientpositive/constraints_optimization.q index 3646e198cb..4f7644275f 100644 --- a/ql/src/test/queries/clientpositive/constraints_optimization.q +++ b/ql/src/test/queries/clientpositive/constraints_optimization.q @@ -428,3 +428,10 @@ GROUP BY , C_LOGIN ; +create table web_sales(ws_order_number int, ws_item_sk int, ws_price float, + constraint pk1 primary key(ws_order_number, ws_item_sk) disable rely); +insert into web_sales values(1, 1, 1.2); +insert into web_sales values(1, 1, 1.2); + explain cbo select count(distinct ws_order_number) from web_sales; + select count(distinct ws_order_number) from web_sales; + drop table web_sales; diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index bf60646636..f7ed9f58a8 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -2742,3 +2742,66 @@ HiveAggregate(group=[{1}]) HiveFilter(condition=[IS NOT NULL($3)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) +PREHOOK: query: create table web_sales(ws_order_number int, ws_item_sk int, ws_price float, + constraint pk1 primary key(ws_order_number, ws_item_sk) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_sales +POSTHOOK: query: create table web_sales(ws_order_number int, ws_item_sk int, ws_price float, + constraint pk1 primary key(ws_order_number, ws_item_sk) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_sales +PREHOOK: query: insert into web_sales values(1, 1, 1.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@web_sales +POSTHOOK: query: insert into web_sales values(1, 1, 1.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@web_sales +POSTHOOK: Lineage: web_sales.ws_item_sk SCRIPT [] +POSTHOOK: Lineage: web_sales.ws_order_number SCRIPT [] +POSTHOOK: Lineage: web_sales.ws_price SCRIPT [] +PREHOOK: query: insert into web_sales values(1, 1, 1.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@web_sales +POSTHOOK: query: insert into web_sales values(1, 1, 1.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@web_sales +POSTHOOK: Lineage: web_sales.ws_item_sk SCRIPT [] +POSTHOOK: Lineage: web_sales.ws_order_number SCRIPT [] +POSTHOOK: Lineage: web_sales.ws_price SCRIPT [] +PREHOOK: query: explain cbo select count(distinct ws_order_number) from web_sales +PREHOOK: type: QUERY +PREHOOK: Input: default@web_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(distinct ws_order_number) from web_sales +POSTHOOK: type: QUERY +POSTHOOK: Input: default@web_sales +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveProject(ws_order_number=[$0]) + HiveAggregate(group=[{0}]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + +PREHOOK: query: select count(distinct ws_order_number) from web_sales +PREHOOK: type: QUERY +PREHOOK: Input: default@web_sales +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct ws_order_number) from web_sales +POSTHOOK: type: QUERY +POSTHOOK: Input: default@web_sales +#### A masked pattern was here #### +1 +PREHOOK: query: drop table web_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@web_sales +PREHOOK: Output: default@web_sales +POSTHOOK: query: drop table web_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: default@web_sales diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out index a7a1a0b082..735a4db2ee 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out @@ -641,13 +641,11 @@ PREHOOK: query: explain select empid, deptno from emps_n3 group by empid, deptno PREHOOK: type: QUERY PREHOOK: Input: default@emps_n3 -PREHOOK: Input: default@mv1_n2 #### A masked pattern was here #### POSTHOOK: query: explain select empid, deptno from emps_n3 group by empid, deptno POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 -POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage @@ -658,7 +656,7 @@ STAGE PLANS: limit: -1 Processor Tree: TableScan - alias: default.mv1_n2 + alias: emps_n3 Select Operator expressions: empid (type: int), deptno (type: int) outputColumnNames: _col0, _col1 @@ -667,12 +665,10 @@ STAGE PLANS: PREHOOK: query: select empid, deptno from emps_n3 group by empid, deptno PREHOOK: type: QUERY PREHOOK: Input: default@emps_n3 -PREHOOK: Input: default@mv1_n2 #### A masked pattern was here #### POSTHOOK: query: select empid, deptno from emps_n3 group by empid, deptno POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 -POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### 100 10 110 10 @@ -713,13 +709,11 @@ PREHOOK: query: explain select empid, name from emps_n3 group by empid, name PREHOOK: type: QUERY PREHOOK: Input: default@emps_n3 -PREHOOK: Input: default@mv1_n2 #### A masked pattern was here #### POSTHOOK: query: explain select empid, name from emps_n3 group by empid, name POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 -POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage @@ -730,7 +724,7 @@ STAGE PLANS: limit: -1 Processor Tree: TableScan - alias: default.mv1_n2 + alias: emps_n3 Select Operator expressions: empid (type: int), name (type: varchar(256)) outputColumnNames: _col0, _col1 @@ -739,12 +733,10 @@ STAGE PLANS: PREHOOK: query: select empid, name from emps_n3 group by empid, name PREHOOK: type: QUERY PREHOOK: Input: default@emps_n3 -PREHOOK: Input: default@mv1_n2 #### A masked pattern was here #### POSTHOOK: query: select empid, name from emps_n3 group by empid, name POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 -POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### 100 Bill 110 Theodore diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out index ddbf80a66d..251fd6861d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out @@ -97,6 +97,7 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveProject(cs_warehouse_sk=[$14], cs_order_number=[$17]) HiveFilter(condition=[IS NOT NULL($14)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs2]) - HiveProject(cr_order_number0=[$16], $f1=[true]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr1]) + HiveProject(cr_order_number0=[$0], $f1=[true]) + HiveAggregate(group=[{16}]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr1]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out index 1d2e0df126..e5b0d19715 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[437][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[438][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[440][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[441][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[443][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[444][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[446][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[447][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 24' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -122,10 +122,10 @@ CBO PLAN: HiveSortLimit(fetch=[100]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[sum($0)]) - HiveProject(sales=[$0]) + HiveProject($f0=[$0]) HiveUnion(all=[true]) - HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) - HiveJoin(condition=[=($3, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[*(CAST($4):DECIMAL(10, 0), $5)]) + HiveSemiJoin(condition=[=($3, $8)], joinType=[inner]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0]) HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -169,34 +169,22 @@ HiveSortLimit(fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(item_sk=[$0]) - HiveFilter(condition=[>($1, 4)]) - HiveAggregate(group=[{4}], agg#0=[count()]) - HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(sales=[*(CAST($5):DECIMAL(10, 0), $6)]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(item_sk=[$0]) - HiveFilter(condition=[>($1, 4)]) - HiveAggregate(group=[{4}], agg#0=[count()]) - HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(substr=[$0], i_item_sk=[$1], d_date=[$2], $f3=[$3]) + HiveFilter(condition=[>($3, 4)]) + HiveProject(substr=[$2], i_item_sk=[$1], d_date=[$0], $f3=[$3]) + HiveAggregate(group=[{3, 4, 5}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[*(CAST($4):DECIMAL(10, 0), $5)]) + HiveSemiJoin(condition=[=($2, $8)], joinType=[inner]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0]) HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -240,4 +228,18 @@ HiveSortLimit(fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(substr=[$0], i_item_sk=[$1], d_date=[$2], $f3=[$3]) + HiveFilter(condition=[>($3, 4)]) + HiveProject(substr=[$2], i_item_sk=[$1], d_date=[$0], $f3=[$3]) + HiveAggregate(group=[{3, 4, 5}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query39.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query39.q.out index 272bedcbce..88ffbbece5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query39.q.out @@ -63,31 +63,31 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[$2], cov=[$3], w_warehouse_sk1=[$4], i_item_sk1=[$5], d_moy1=[CAST(5):INTEGER], mean1=[$6], cov1=[$7]) HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$6], sort5=[$7], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC]) - HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], mean=[$6], cov=[$7], w_warehouse_sk0=[$0], i_item_sk0=[$1], mean0=[$2], cov0=[$3]) - HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], mean=[/(CAST($5):DOUBLE, $4)], cov=[CASE(=(/(CAST($5):DOUBLE, $4), 0), null, /(POWER(/(-($2, /(*($3, $3), $4)), CASE(=($4, 1), null, -($4, 1))), 0.5), /(CAST($5):DOUBLE, $4)))]) - HiveFilter(condition=[CASE(=(/(CAST($5):DOUBLE, $4), 0), false, >(/(POWER(/(-($2, /(*($3, $3), $4)), CASE(=($4, 1), null, -($4, 1))), 0.5), /(CAST($5):DOUBLE, $4)), 1))]) - HiveAggregate(group=[{1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) + HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], mean=[$2], cov=[$3], w_warehouse_sk0=[$4], i_item_sk0=[$5], mean0=[$6], cov0=[$7]) + HiveJoin(condition=[AND(=($1, $5), =($0, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(w_warehouse_sk=[$1], i_item_sk=[$2], mean=[/(CAST($6):DOUBLE, $5)], cov=[CASE(=(/(CAST($6):DOUBLE, $5), 0), null, /(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)))]) + HiveFilter(condition=[CASE(=(/(CAST($6):DOUBLE, $5), 0), false, >(/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)), 1))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) HiveProject($f0=[$6], $f1=[$5], $f2=[$3], $f4=[$2], $f40=[CAST($2):DOUBLE], $f6=[*(CAST($2):DOUBLE, CAST($2):DOUBLE)]) HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], inv_item_sk=[$1]) HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 5))]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) - HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], mean=[/(CAST($5):DOUBLE, $4)], cov=[CASE(=(/(CAST($5):DOUBLE, $4), 0), null, /(POWER(/(-($2, /(*($3, $3), $4)), CASE(=($4, 1), null, -($4, 1))), 0.5), /(CAST($5):DOUBLE, $4)))]) - HiveFilter(condition=[CASE(=(/(CAST($5):DOUBLE, $4), 0), false, >(/(POWER(/(-($2, /(*($3, $3), $4)), CASE(=($4, 1), null, -($4, 1))), 0.5), /(CAST($5):DOUBLE, $4)), 1))]) - HiveAggregate(group=[{1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) + HiveProject(w_warehouse_sk=[$1], i_item_sk=[$2], mean=[/(CAST($6):DOUBLE, $5)], cov=[CASE(=(/(CAST($6):DOUBLE, $5), 0), null, /(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)))]) + HiveFilter(condition=[CASE(=(/(CAST($6):DOUBLE, $5), 0), false, >(/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)), 1))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) HiveProject($f0=[$6], $f1=[$5], $f2=[$3], $f4=[$2], $f40=[CAST($2):DOUBLE], $f6=[*(CAST($2):DOUBLE, CAST($2):DOUBLE)]) HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], inv_item_sk=[$1]) HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 4))]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 5))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out index 198778f8e7..19c531273b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out @@ -93,6 +93,7 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) HiveFilter(condition=[IS NOT NULL($15)]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) - HiveProject(wr_order_number0=[$13], $f1=[true]) - HiveTableScan(table=[[default, web_returns]], table:alias=[wr1]) + HiveProject(wr_order_number0=[$0], $f1=[true]) + HiveAggregate(group=[{13}]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr1]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out index 0546f1ce5e..526dafb1f3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out @@ -78,22 +78,24 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $6)], agg#1=[sum($7)], agg#2=[sum($8)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_order_number=[$14]) - HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_order_number=[$1]) + HiveProject(wr_order_number=[$0]) + HiveAggregate(group=[{14}]) + HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$1]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$0]) + HiveAggregate(group=[{1}]) HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_order_number=[$1]) - HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out index 3143be8480..cb2db82997 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out @@ -79,11 +79,12 @@ Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 18 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -93,152 +94,159 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_169] - Limit [LIM_168] (rows=1 width=240) + File Output Operator [FS_176] + Limit [LIM_175] (rows=1 width=240) Number of rows:100 - Select Operator [SEL_167] (rows=1 width=240) + Select Operator [SEL_174] (rows=1 width=240) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] - Select Operator [SEL_165] (rows=1 width=240) + SHUFFLE [RS_173] + Select Operator [SEL_172] (rows=1 width=240) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_164] (rows=1 width=232) + Group By Operator [GBY_171] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_163] - Group By Operator [GBY_162] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_170] + Group By Operator [GBY_169] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_161] (rows=5150256 width=228) + Group By Operator [GBY_168] (rows=5150256 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_69] + SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=5150256 width=228) + Group By Operator [GBY_72] (rows=5150256 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_37] (rows=5150256 width=218) + Select Operator [SEL_41] (rows=5150256 width=214) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_36] (rows=5150256 width=218) + Filter Operator [FIL_40] (rows=5150256 width=214) predicate:_col14 is null - Merge Join Operator [MERGEJOIN_125] (rows=13282454 width=218) - Conds:RS_33._col4=RS_160._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] + Merge Join Operator [MERGEJOIN_129] (rows=10300512 width=214) + Conds:RS_37._col4=RS_167._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_167] PartitionCols:_col0 - Select Operator [SEL_159] (rows=28798881 width=8) + Select Operator [SEL_166] (rows=18238808 width=8) Output:["_col0","_col1"] - TableScan [TS_25] (rows=28798881 width=4) - default@catalog_returns,cr1,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_order_number"] + Group By Operator [GBY_165] (rows=18238808 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_164] + PartitionCols:_col0 + Group By Operator [GBY_163] (rows=28798881 width=4) + Output:["_col0"],keys:cr_order_number + TableScan [TS_25] (rows=28798881 width=4) + default@catalog_returns,cr1,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_order_number"] <-Reducer 5 [ONE_TO_ONE_EDGE] - FORWARD [RS_33] + FORWARD [RS_37] PartitionCols:_col4 - Select Operator [SEL_32] (rows=5150256 width=200) + Select Operator [SEL_36] (rows=5150256 width=200) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_124] (rows=5150256 width=202) - Conds:RS_29._col4=RS_158._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + Merge Join Operator [MERGEJOIN_128] (rows=5150256 width=202) + Conds:RS_33._col4=RS_162._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_29] + SHUFFLE [RS_33] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_123] (rows=5150256 width=200) - Conds:RS_18._col2=RS_144._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_127] (rows=5150256 width=200) + Conds:RS_18._col2=RS_148._col0(Inner),Output:["_col3","_col4","_col5","_col6"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_143] (rows=10 width=102) + Select Operator [SEL_147] (rows=10 width=102) Output:["_col0"] - Filter Operator [FIL_142] (rows=10 width=102) + Filter Operator [FIL_146] (rows=10 width=102) predicate:(cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') TableScan [TS_9] (rows=60 width=102) default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_county"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_122] (rows=30901534 width=230) - Conds:RS_15._col1=RS_136._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_126] (rows=30901534 width=230) + Conds:RS_15._col1=RS_140._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_135] (rows=784314 width=90) + Select Operator [SEL_139] (rows=784314 width=90) Output:["_col0"] - Filter Operator [FIL_134] (rows=784314 width=90) + Filter Operator [FIL_138] (rows=784314 width=90) predicate:(ca_state = 'NY') TableScan [TS_6] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_121] (rows=31519516 width=234) - Conds:RS_152._col0=RS_128._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_125] (rows=31519516 width=234) + Conds:RS_156._col0=RS_132._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + SHUFFLE [RS_132] PartitionCols:_col0 - Select Operator [SEL_127] (rows=8116 width=98) + Select Operator [SEL_131] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_126] (rows=8116 width=98) + Filter Operator [FIL_130] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_156] PartitionCols:_col0 - Select Operator [SEL_151] (rows=283695062 width=243) + Select Operator [SEL_155] (rows=283695062 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_150] (rows=283695062 width=243) + Filter Operator [FIL_154] (rows=283695062 width=243) predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND DynamicValue(RS_19_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and (cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) TableScan [TS_0] (rows=287989836 width=243) default@catalog_sales,cs1,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_133] - Group By Operator [GBY_132] (rows=1 width=12) + BROADCAST [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - Group By Operator [GBY_130] (rows=1 width=12) + SHUFFLE [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_129] (rows=8116 width=4) + Select Operator [SEL_133] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_127] + Please refer to the previous Select Operator [SEL_131] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_141] - Group By Operator [GBY_140] (rows=1 width=12) + BROADCAST [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] - Group By Operator [GBY_138] (rows=1 width=12) + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_137] (rows=784314 width=4) + Select Operator [SEL_141] (rows=784314 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_135] + Please refer to the previous Select Operator [SEL_139] <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_149] - Group By Operator [GBY_148] (rows=1 width=12) + BROADCAST [RS_153] + Group By Operator [GBY_152] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] - Group By Operator [GBY_146] (rows=1 width=12) + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_145] (rows=10 width=4) + Select Operator [SEL_149] (rows=10 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_143] + Please refer to the previous Select Operator [SEL_147] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_158] + SHUFFLE [RS_162] PartitionCols:_col0 - Group By Operator [GBY_157] (rows=286548719 width=7) + Group By Operator [GBY_161] (rows=286548719 width=7) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_156] (rows=286548719 width=7) + Select Operator [SEL_160] (rows=286548719 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=286548719 width=7) - predicate:((cs_order_number BETWEEN DynamicValue(RS_29_cs1_cs_order_number_min) AND DynamicValue(RS_29_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_29_cs1_cs_order_number_bloom_filter))) and cs_warehouse_sk is not null) + Filter Operator [FIL_159] (rows=286548719 width=7) + predicate:((cs_order_number BETWEEN DynamicValue(RS_33_cs1_cs_order_number_min) AND DynamicValue(RS_33_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_33_cs1_cs_order_number_bloom_filter))) and cs_warehouse_sk is not null) TableScan [TS_22] (rows=287989836 width=7) default@catalog_sales,cs2,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_warehouse_sk","cs_order_number"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_154] - Group By Operator [GBY_153] (rows=1 width=12) + BROADCAST [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) + SHUFFLE [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=5150256 width=8) + Select Operator [SEL_113] (rows=5150256 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_123] + Please refer to the previous Merge Join Operator [MERGEJOIN_127] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out index 292c920583..eeeaee6109 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[437][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[438][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[440][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[441][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[443][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[444][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[446][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[447][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 24' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -121,15 +121,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 31 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 1 <- Reducer 34 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Map 15 <- Reducer 27 (BROADCAST_EDGE) -Map 35 <- Reducer 7 (BROADCAST_EDGE) -Map 37 <- Reducer 34 (BROADCAST_EDGE) -Map 39 <- Reducer 14 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE) -Map 40 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Map 39 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Map 28 <- Reducer 7 (BROADCAST_EDGE) +Map 30 <- Reducer 38 (BROADCAST_EDGE) +Map 40 <- Reducer 14 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE) +Map 41 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 40 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 32 (ONE_TO_ONE_EDGE), Union 5 (CONTAINS) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) @@ -137,24 +137,24 @@ Reducer 17 <- Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 21 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 36 (CUSTOM_SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE) Reducer 21 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 22 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 23 <- Reducer 22 (CUSTOM_SIMPLE_EDGE), Reducer 25 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE), Reducer 41 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE), Reducer 42 (CUSTOM_SIMPLE_EDGE) Reducer 25 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 26 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) -Reducer 29 <- Map 38 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 29 <- Map 28 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Reducer 29 (SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Map 35 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 30 (ONE_TO_ONE_EDGE), Union 5 (CONTAINS) -Reducer 41 <- Map 40 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 32 <- Map 39 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Reducer 32 (SIMPLE_EDGE) +Reducer 36 <- Reducer 35 (CUSTOM_SIMPLE_EDGE) +Reducer 38 <- Map 37 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 42 <- Map 41 (SIMPLE_EDGE) Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -164,357 +164,370 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_532] - Limit [LIM_531] (rows=1 width=112) + File Output Operator [FS_542] + Limit [LIM_541] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_530] (rows=1 width=112) + Group By Operator [GBY_540] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] <-Reducer 12 [CONTAINS] - Reduce Output Operator [RS_454] - Group By Operator [GBY_453] (rows=1 width=112) + Reduce Output Operator [RS_460] + Group By Operator [GBY_459] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_451] (rows=52 width=112) + Select Operator [SEL_457] (rows=155 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_450] (rows=52 width=2) - Conds:RS_192._col1=RS_538._col0(Inner),Output:["_col3","_col4"] - <-Reducer 32 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_538] + Merge Join Operator [MERGEJOIN_456] (rows=155 width=0) + Conds:RS_198._col1=RS_550._col0(Left Semi),Output:["_col3","_col4"] + <-Reducer 35 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_550] PartitionCols:_col0 - Select Operator [SEL_537] (rows=745 width=4) - Output:["_col0"] - Filter Operator [FIL_536] (rows=745 width=12) - predicate:(_col1 > 4L) - Group By Operator [GBY_535] (rows=2235 width=12) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_182] - PartitionCols:_col0 - Group By Operator [GBY_83] (rows=2235 width=12) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col4 - Merge Join Operator [MERGEJOIN_431] (rows=19646398 width=4) - Conds:RS_79._col1=RS_483._col0(Inner),Output:["_col4"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_483] - PartitionCols:_col0 - Select Operator [SEL_482] (rows=462000 width=188) - Output:["_col0"] - TableScan [TS_74] (rows=462000 width=4) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_79] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_430] (rows=19646398 width=4) - Conds:RS_481._col0=RS_471._col0(Inner),Output:["_col1"] - <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_471] - PartitionCols:_col0 - Select Operator [SEL_468] (rows=2609 width=4) - Output:["_col0"] - Filter Operator [FIL_467] (rows=2609 width=8) - predicate:(d_year) IN (1999, 2000, 2001, 2002) - TableScan [TS_9] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_481] + Group By Operator [GBY_549] (rows=2235 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_548] (rows=6548799 width=4) + Output:["_col0"] + Filter Operator [FIL_547] (rows=6548799 width=290) + predicate:(_col3 > 4L) + Select Operator [SEL_546] (rows=19646398 width=290) + Output:["_col1","_col3"] + Group By Operator [GBY_545] (rows=19646398 width=290) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_185] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_83] (rows=19646398 width=290) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col3, _col5 + Merge Join Operator [MERGEJOIN_437] (rows=19646398 width=282) + Conds:RS_79._col1=RS_485._col0(Inner),Output:["_col3","_col4","_col5"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_485] PartitionCols:_col0 - Select Operator [SEL_480] (rows=550076554 width=7) + Select Operator [SEL_484] (rows=462000 width=188) Output:["_col0","_col1"] - Filter Operator [FIL_479] (rows=550076554 width=7) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_77_date_dim_d_date_sk_min) AND DynamicValue(RS_77_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_77_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) - TableScan [TS_68] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_478] - Group By Operator [GBY_477] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_476] - Group By Operator [GBY_474] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_472] (rows=2609 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_468] + TableScan [TS_74] (rows=462000 width=188) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_79] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_436] (rows=19646398 width=98) + Conds:RS_483._col0=RS_475._col0(Inner),Output:["_col1","_col3"] + <-Map 37 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_475] + PartitionCols:_col0 + Select Operator [SEL_474] (rows=2609 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_473] (rows=2609 width=102) + predicate:(d_year) IN (1999, 2000, 2001, 2002) + TableScan [TS_71] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_year"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_483] + PartitionCols:_col0 + Select Operator [SEL_482] (rows=550076554 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_481] (rows=550076554 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_77_date_dim_d_date_sk_min) AND DynamicValue(RS_77_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_77_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_68] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] + <-Reducer 38 [BROADCAST_EDGE] vectorized + BROADCAST [RS_480] + Group By Operator [GBY_479] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_478] + Group By Operator [GBY_477] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_476] (rows=2609 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_474] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_192] + SHUFFLE [RS_198] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_442] (rows=3941101 width=118) - Conds:RS_189._col2=RS_190._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_448] (rows=3941101 width=118) + Conds:RS_193._col2=RS_194._col0(Inner),Output:["_col1","_col3","_col4"] <-Reducer 10 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_189] + PARTITION_ONLY_SHUFFLE [RS_193] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_432] (rows=3941102 width=122) - Conds:RS_546._col0=RS_459._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_438] (rows=3941102 width=122) + Conds:RS_558._col0=RS_465._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_459] + PARTITION_ONLY_SHUFFLE [RS_465] PartitionCols:_col0 - Select Operator [SEL_456] (rows=50 width=4) + Select Operator [SEL_462] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_455] (rows=50 width=12) + Filter Operator [FIL_461] (rows=50 width=12) predicate:((d_moy = 1) and (d_year = 1999)) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_546] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_558] PartitionCols:_col0 - Select Operator [SEL_545] (rows=143930993 width=127) + Select Operator [SEL_557] (rows=143930993 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_544] (rows=143930993 width=127) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_193_item_i_item_sk_min) AND DynamicValue(RS_193_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_193_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_187_date_dim_d_date_sk_min) AND DynamicValue(RS_187_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_187_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_98] (rows=144002668 width=127) + Filter Operator [FIL_556] (rows=143930993 width=127) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_199_item_i_item_sk_min) AND DynamicValue(RS_199_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_199_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_191_date_dim_d_date_sk_min) AND DynamicValue(RS_191_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_191_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_101] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_534] - Group By Operator [GBY_533] (rows=1 width=12) + BROADCAST [RS_544] + Group By Operator [GBY_543] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_464] - Group By Operator [GBY_462] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_470] + Group By Operator [GBY_468] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_460] (rows=50 width=4) + Select Operator [SEL_466] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_456] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_543] - Group By Operator [GBY_542] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_462] + <-Reducer 36 [BROADCAST_EDGE] vectorized + BROADCAST [RS_555] + Group By Operator [GBY_554] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_541] - Group By Operator [GBY_540] (rows=1 width=12) + <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_553] + Group By Operator [GBY_552] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_539] (rows=745 width=4) + Select Operator [SEL_551] (rows=2235 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_537] + Please refer to the previous Group By Operator [GBY_549] <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_190] + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_165] (rows=471875 width=3) + Select Operator [SEL_168] (rows=471875 width=3) Output:["_col0"] - Filter Operator [FIL_164] (rows=471875 width=227) + Filter Operator [FIL_167] (rows=471875 width=227) predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_441] (rows=1415626 width=227) + Merge Join Operator [MERGEJOIN_447] (rows=1415626 width=227) Conds:(Inner),Output:["_col1","_col2","_col3"] <-Reducer 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_161] - Merge Join Operator [MERGEJOIN_440] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_164] + Merge Join Operator [MERGEJOIN_446] (rows=1 width=112) Conds:(Inner),Output:["_col1"] <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_552] - Select Operator [SEL_551] (rows=1 width=8) - Filter Operator [FIL_550] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_564] + Select Operator [SEL_563] (rows=1 width=8) + Filter Operator [FIL_562] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_549] (rows=1 width=8) + Group By Operator [GBY_561] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_548] (rows=1 width=8) - Group By Operator [GBY_547] (rows=1 width=8) + Select Operator [SEL_560] (rows=1 width=8) + Group By Operator [GBY_559] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_512] - Group By Operator [GBY_508] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_522] + Group By Operator [GBY_518] (rows=1 width=8) Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_504] (rows=50562 width=112) + Select Operator [SEL_514] (rows=50562 width=112) Output:["_col0"] - Group By Operator [GBY_501] (rows=50562 width=112) + Group By Operator [GBY_511] (rows=50562 width=112) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=455058 width=112) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col1 - Merge Join Operator [MERGEJOIN_428] (rows=18762463 width=112) - Conds:RS_500._col0=RS_469._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_434] (rows=18762463 width=112) + Conds:RS_510._col0=RS_502._col0(Inner),Output:["_col1","_col2"] <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_469] + PARTITION_ONLY_SHUFFLE [RS_502] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_468] + Select Operator [SEL_501] (rows=2609 width=4) + Output:["_col0"] + Filter Operator [FIL_500] (rows=2609 width=8) + predicate:(d_year) IN (1999, 2000, 2001, 2002) + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_500] + SHUFFLE [RS_510] PartitionCols:_col0 - Select Operator [SEL_499] (rows=525327388 width=119) + Select Operator [SEL_509] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_498] (rows=525327388 width=118) + Filter Operator [FIL_508] (rows=525327388 width=118) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_6] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_497] - Group By Operator [GBY_496] (rows=1 width=12) + BROADCAST [RS_507] + Group By Operator [GBY_506] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_475] - Group By Operator [GBY_473] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_505] + Group By Operator [GBY_504] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_470] (rows=2609 width=4) + Select Operator [SEL_503] (rows=2609 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_468] + Please refer to the previous Select Operator [SEL_501] <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_554] - Group By Operator [GBY_553] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_566] + Group By Operator [GBY_565] (rows=1 width=112) Output:["_col0"],aggregations:["max(VALUE._col0)"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_513] - Group By Operator [GBY_509] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_523] + Group By Operator [GBY_519] (rows=1 width=112) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_505] (rows=50562 width=112) + Select Operator [SEL_515] (rows=50562 width=112) Output:["_col1"] - Please refer to the previous Group By Operator [GBY_501] - <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_562] - Group By Operator [GBY_561] (rows=1415626 width=115) + Please refer to the previous Group By Operator [GBY_511] + <-Reducer 42 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_574] + Group By Operator [GBY_573] (rows=1415626 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_560] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_572] PartitionCols:_col0 - Group By Operator [GBY_559] (rows=550080312 width=115) + Group By Operator [GBY_571] (rows=550080312 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_558] (rows=550080312 width=114) + Select Operator [SEL_570] (rows=550080312 width=114) Output:["_col0","_col1"] - Filter Operator [FIL_557] (rows=550080312 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_189_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_189_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_189_web_sales_ws_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) - TableScan [TS_150] (rows=575995635 width=114) + Filter Operator [FIL_569] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_193_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_193_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_193_web_sales_ws_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_153] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_556] - Group By Operator [GBY_555] (rows=1 width=12) + BROADCAST [RS_568] + Group By Operator [GBY_567] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 10 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_406] - Group By Operator [GBY_405] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_412] + Group By Operator [GBY_411] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_404] (rows=3941102 width=7) + Select Operator [SEL_410] (rows=3941102 width=7) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_432] + Please refer to the previous Merge Join Operator [MERGEJOIN_438] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_449] - Group By Operator [GBY_448] (rows=1 width=112) + Reduce Output Operator [RS_455] + Group By Operator [GBY_454] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_446] (rows=102 width=112) + Select Operator [SEL_452] (rows=304 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_445] (rows=102 width=1) - Conds:RS_94._col2=RS_487._col0(Inner),Output:["_col3","_col4"] - <-Reducer 30 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_487] + Merge Join Operator [MERGEJOIN_451] (rows=304 width=0) + Conds:RS_97._col2=RS_491._col0(Left Semi),Output:["_col3","_col4"] + <-Reducer 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_491] PartitionCols:_col0 - Select Operator [SEL_486] (rows=745 width=4) - Output:["_col0"] - Filter Operator [FIL_485] (rows=745 width=12) - predicate:(_col1 > 4L) - Group By Operator [GBY_484] (rows=2235 width=12) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_84] - PartitionCols:_col0 - Please refer to the previous Group By Operator [GBY_83] + Group By Operator [GBY_490] (rows=2235 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_489] (rows=6548799 width=4) + Output:["_col0"] + Filter Operator [FIL_488] (rows=6548799 width=290) + predicate:(_col3 > 4L) + Select Operator [SEL_487] (rows=19646398 width=290) + Output:["_col1","_col3"] + Group By Operator [GBY_486] (rows=19646398 width=290) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col0, _col1, _col2 + Please refer to the previous Group By Operator [GBY_83] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_94] + SHUFFLE [RS_97] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_439] (rows=7751875 width=98) - Conds:RS_91._col1=RS_92._col0(Inner),Output:["_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_445] (rows=7751875 width=98) + Conds:RS_92._col1=RS_93._col0(Inner),Output:["_col2","_col3","_col4"] <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_91] + PARTITION_ONLY_SHUFFLE [RS_92] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_427] (rows=7751875 width=101) - Conds:RS_495._col0=RS_457._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_433] (rows=7751875 width=101) + Conds:RS_499._col0=RS_463._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_457] + PARTITION_ONLY_SHUFFLE [RS_463] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_456] + Please refer to the previous Select Operator [SEL_462] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_495] + SHUFFLE [RS_499] PartitionCols:_col0 - Select Operator [SEL_494] (rows=285117831 width=127) + Select Operator [SEL_498] (rows=285117831 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_493] (rows=285117831 width=127) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_89_date_dim_d_date_sk_min) AND DynamicValue(RS_89_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_89_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + Filter Operator [FIL_497] (rows=285117831 width=127) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_492] - Group By Operator [GBY_491] (rows=1 width=12) + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_496] + Group By Operator [GBY_495] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - FORWARD [RS_490] - Group By Operator [GBY_489] (rows=1 width=12) + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_494] + Group By Operator [GBY_493] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_488] (rows=745 width=4) + Select Operator [SEL_492] (rows=2235 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_486] + Please refer to the previous Group By Operator [GBY_490] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_466] - Group By Operator [GBY_465] (rows=1 width=12) + BROADCAST [RS_472] + Group By Operator [GBY_471] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_463] - Group By Operator [GBY_461] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_469] + Group By Operator [GBY_467] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_458] (rows=50 width=4) + Select Operator [SEL_464] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_456] + Please refer to the previous Select Operator [SEL_462] <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_92] + SHUFFLE [RS_93] PartitionCols:_col0 Select Operator [SEL_67] (rows=471875 width=3) Output:["_col0"] Filter Operator [FIL_66] (rows=471875 width=227) predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_438] (rows=1415626 width=227) + Merge Join Operator [MERGEJOIN_444] (rows=1415626 width=227) Conds:(Inner),Output:["_col1","_col2","_col3"] <-Reducer 19 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_63] - Merge Join Operator [MERGEJOIN_437] (rows=1 width=112) + Merge Join Operator [MERGEJOIN_443] (rows=1 width=112) Conds:(Inner),Output:["_col1"] <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_519] - Select Operator [SEL_518] (rows=1 width=8) - Filter Operator [FIL_517] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_529] + Select Operator [SEL_528] (rows=1 width=8) + Filter Operator [FIL_527] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_516] (rows=1 width=8) + Group By Operator [GBY_526] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_515] (rows=1 width=8) - Group By Operator [GBY_514] (rows=1 width=8) + Select Operator [SEL_525] (rows=1 width=8) + Group By Operator [GBY_524] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_510] - Group By Operator [GBY_506] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_520] + Group By Operator [GBY_516] (rows=1 width=8) Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_502] (rows=50562 width=112) + Select Operator [SEL_512] (rows=50562 width=112) Output:["_col0"] - Please refer to the previous Group By Operator [GBY_501] + Please refer to the previous Group By Operator [GBY_511] <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_521] - Group By Operator [GBY_520] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_531] + Group By Operator [GBY_530] (rows=1 width=112) Output:["_col0"],aggregations:["max(VALUE._col0)"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_511] - Group By Operator [GBY_507] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_521] + Group By Operator [GBY_517] (rows=1 width=112) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_503] (rows=50562 width=112) + Select Operator [SEL_513] (rows=50562 width=112) Output:["_col1"] - Please refer to the previous Group By Operator [GBY_501] - <-Reducer 36 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_529] - Group By Operator [GBY_528] (rows=1415626 width=115) + Please refer to the previous Group By Operator [GBY_511] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_539] + Group By Operator [GBY_538] (rows=1415626 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_527] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_537] PartitionCols:_col0 - Group By Operator [GBY_526] (rows=550080312 width=115) + Group By Operator [GBY_536] (rows=550080312 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_525] (rows=550080312 width=114) + Select Operator [SEL_535] (rows=550080312 width=114) Output:["_col0","_col1"] - Filter Operator [FIL_524] (rows=550080312 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_91_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_91_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_91_catalog_sales_cs_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + Filter Operator [FIL_534] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_92_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_92_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_92_catalog_sales_cs_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) TableScan [TS_52] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_523] - Group By Operator [GBY_522] (rows=1 width=12) + BROADCAST [RS_533] + Group By Operator [GBY_532] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_333] - Group By Operator [GBY_332] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_339] + Group By Operator [GBY_338] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_331] (rows=7751875 width=6) + Select Operator [SEL_337] (rows=7751875 width=6) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_427] + Please refer to the previous Merge Join Operator [MERGEJOIN_433] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out index 812928b32c..ca2d830a11 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out @@ -66,7 +66,7 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (ONE_TO_ONE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) @@ -77,99 +77,101 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_166] - Select Operator [SEL_165] (rows=859 width=56) + File Output Operator [FS_170] + Select Operator [SEL_169] (rows=189509 width=56) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_44] - Select Operator [SEL_43] (rows=859 width=48) - Output:["_col0","_col1","_col2","_col3","_col6","_col7"] - Merge Join Operator [MERGEJOIN_144] (rows=859 width=40) - Conds:RS_160._col0, _col1=RS_164._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col6","_col7"] - <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_160] - PartitionCols:_col0, _col1 - Select Operator [SEL_159] (rows=859 width=24) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_158] (rows=859 width=40) - predicate:CASE WHEN (((UDFToDouble(_col2) / _col3) = 0)) THEN (false) ELSE (((power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (UDFToDouble(_col2) / _col3)) > 1.0D)) END - Group By Operator [GBY_157] (rows=1719 width=40) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1 + Merge Join Operator [MERGEJOIN_146] (rows=189509 width=48) + Conds:RS_163._col0, _col1=RS_168._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col0, _col1 + Select Operator [SEL_162] (rows=18049 width=24) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_161] (rows=18049 width=40) + predicate:CASE WHEN (((UDFToDouble(_col3) / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (UDFToDouble(_col3) / _col4)) > 1.0D)) END + Select Operator [SEL_160] (rows=36099 width=40) + Output:["_col0","_col1","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_159] (rows=36099 width=140) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_16] - PartitionCols:_col0, _col1 - Group By Operator [GBY_15] (rows=1719 width=40) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2 - Select Operator [SEL_13] (rows=1032514 width=8) - Output:["_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_141] (rows=1032514 width=8) - Conds:RS_10._col1=RS_155._col0(Inner),Output:["_col2","_col3","_col5"] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_15] (rows=36099 width=140) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 + Select Operator [SEL_13] (rows=1032514 width=108) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_143] (rows=1032514 width=108) + Conds:RS_10._col1=RS_157._col0(Inner),Output:["_col2","_col3","_col5","_col6"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_155] + SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_154] (rows=27 width=4) - Output:["_col0"] - TableScan [TS_5] (rows=27 width=4) - default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk"] + Select Operator [SEL_156] (rows=27 width=104) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_140] (rows=1032514 width=8) - Conds:RS_146._col0=RS_152._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_142] (rows=1032514 width=8) + Conds:RS_148._col0=RS_154._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] + SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_145] (rows=37584000 width=15) + Select Operator [SEL_147] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] TableScan [TS_0] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_154] PartitionCols:_col0 - Select Operator [SEL_150] (rows=50 width=4) + Select Operator [SEL_152] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_148] (rows=50 width=12) - predicate:((d_moy = 5) and (d_year = 1999)) + Filter Operator [FIL_150] (rows=50 width=12) + predicate:((d_moy = 4) and (d_year = 1999)) TableScan [TS_2] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_164] - PartitionCols:_col0, _col1 - Select Operator [SEL_163] (rows=859 width=24) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_162] (rows=859 width=40) - predicate:CASE WHEN (((UDFToDouble(_col2) / _col3) = 0)) THEN (false) ELSE (((power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (UDFToDouble(_col2) / _col3)) > 1.0D)) END - Group By Operator [GBY_161] (rows=1719 width=40) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1 + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_168] + PartitionCols:_col0, _col1 + Select Operator [SEL_167] (rows=18049 width=24) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_166] (rows=18049 width=40) + predicate:CASE WHEN (((UDFToDouble(_col3) / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (UDFToDouble(_col3) / _col4)) > 1.0D)) END + Select Operator [SEL_165] (rows=36099 width=40) + Output:["_col0","_col1","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_164] (rows=36099 width=140) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=1719 width=40) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2 - Select Operator [SEL_33] (rows=1032514 width=8) - Output:["_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_143] (rows=1032514 width=8) - Conds:RS_30._col1=RS_156._col0(Inner),Output:["_col2","_col3","_col5"] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_35] (rows=36099 width=140) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 + Select Operator [SEL_33] (rows=1032514 width=108) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_145] (rows=1032514 width=108) + Conds:RS_30._col1=RS_158._col0(Inner),Output:["_col2","_col3","_col5","_col6"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_158] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_154] + Please refer to the previous Select Operator [SEL_156] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_142] (rows=1032514 width=8) - Conds:RS_147._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_144] (rows=1032514 width=8) + Conds:RS_149._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] + SHUFFLE [RS_149] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_145] + Please refer to the previous Select Operator [SEL_147] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_155] PartitionCols:_col0 - Select Operator [SEL_151] (rows=50 width=4) + Select Operator [SEL_153] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_149] (rows=50 width=12) - predicate:((d_moy = 4) and (d_year = 1999)) + Filter Operator [FIL_151] (rows=50 width=12) + predicate:((d_moy = 5) and (d_year = 1999)) Please refer to the previous TableScan [TS_2] PREHOOK: query: with inv as diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out index 8c813d84ee..7859063efe 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out @@ -75,11 +75,12 @@ Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 18 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -89,152 +90,159 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_169] - Limit [LIM_168] (rows=1 width=240) + File Output Operator [FS_176] + Limit [LIM_175] (rows=1 width=240) Number of rows:100 - Select Operator [SEL_167] (rows=1 width=240) + Select Operator [SEL_174] (rows=1 width=240) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] - Select Operator [SEL_165] (rows=1 width=240) + SHUFFLE [RS_173] + Select Operator [SEL_172] (rows=1 width=240) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_164] (rows=1 width=232) + Group By Operator [GBY_171] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_163] - Group By Operator [GBY_162] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_170] + Group By Operator [GBY_169] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_161] (rows=2511437 width=228) + Group By Operator [GBY_168] (rows=2511437 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_69] + SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=2511437 width=228) + Group By Operator [GBY_72] (rows=2511437 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_37] (rows=5022875 width=230) + Select Operator [SEL_41] (rows=5022875 width=229) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_36] (rows=5022875 width=230) + Filter Operator [FIL_40] (rows=5022875 width=229) predicate:_col14 is null - Merge Join Operator [MERGEJOIN_125] (rows=14054072 width=230) - Conds:RS_33._col4=RS_160._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] + Merge Join Operator [MERGEJOIN_129] (rows=10045750 width=229) + Conds:RS_37._col4=RS_167._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_167] PartitionCols:_col0 - Select Operator [SEL_159] (rows=14398467 width=8) + Select Operator [SEL_166] (rows=8007986 width=8) Output:["_col0","_col1"] - TableScan [TS_25] (rows=14398467 width=4) - default@web_returns,wr1,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] + Group By Operator [GBY_165] (rows=8007986 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_164] + PartitionCols:_col0 + Group By Operator [GBY_163] (rows=14398467 width=4) + Output:["_col0"],keys:wr_order_number + TableScan [TS_25] (rows=14398467 width=4) + default@web_returns,wr1,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] <-Reducer 5 [ONE_TO_ONE_EDGE] - FORWARD [RS_33] + FORWARD [RS_37] PartitionCols:_col4 - Select Operator [SEL_32] (rows=5022875 width=231) + Select Operator [SEL_36] (rows=5022875 width=231) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_124] (rows=5022875 width=235) - Conds:RS_29._col4=RS_158._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + Merge Join Operator [MERGEJOIN_128] (rows=5022875 width=235) + Conds:RS_33._col4=RS_162._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_29] + SHUFFLE [RS_33] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_123] (rows=5022875 width=231) - Conds:RS_18._col2=RS_144._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_127] (rows=5022875 width=231) + Conds:RS_18._col2=RS_148._col0(Inner),Output:["_col3","_col4","_col5","_col6"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_143] (rows=12 width=91) + Select Operator [SEL_147] (rows=12 width=91) Output:["_col0"] - Filter Operator [FIL_142] (rows=12 width=92) + Filter Operator [FIL_146] (rows=12 width=92) predicate:(web_company_name = 'pri') TableScan [TS_9] (rows=84 width=92) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_122] (rows=15673790 width=235) - Conds:RS_15._col1=RS_136._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_126] (rows=15673790 width=235) + Conds:RS_15._col1=RS_140._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_135] (rows=784314 width=90) + Select Operator [SEL_139] (rows=784314 width=90) Output:["_col0"] - Filter Operator [FIL_134] (rows=784314 width=90) + Filter Operator [FIL_138] (rows=784314 width=90) predicate:(ca_state = 'TX') TableScan [TS_6] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_121] (rows=15987241 width=239) - Conds:RS_152._col0=RS_128._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_125] (rows=15987241 width=239) + Conds:RS_156._col0=RS_132._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + SHUFFLE [RS_132] PartitionCols:_col0 - Select Operator [SEL_127] (rows=8116 width=98) + Select Operator [SEL_131] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_126] (rows=8116 width=98) + Filter Operator [FIL_130] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_156] PartitionCols:_col0 - Select Operator [SEL_151] (rows=143895019 width=243) + Select Operator [SEL_155] (rows=143895019 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_150] (rows=143895019 width=243) + Filter Operator [FIL_154] (rows=143895019 width=243) predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_19_web_site_web_site_sk_min) AND DynamicValue(RS_19_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_19_web_site_web_site_sk_bloom_filter))) and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) TableScan [TS_0] (rows=144002668 width=243) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_133] - Group By Operator [GBY_132] (rows=1 width=12) + BROADCAST [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - Group By Operator [GBY_130] (rows=1 width=12) + SHUFFLE [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_129] (rows=8116 width=4) + Select Operator [SEL_133] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_127] + Please refer to the previous Select Operator [SEL_131] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_141] - Group By Operator [GBY_140] (rows=1 width=12) + BROADCAST [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] - Group By Operator [GBY_138] (rows=1 width=12) + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_137] (rows=784314 width=4) + Select Operator [SEL_141] (rows=784314 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_135] + Please refer to the previous Select Operator [SEL_139] <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_149] - Group By Operator [GBY_148] (rows=1 width=12) + BROADCAST [RS_153] + Group By Operator [GBY_152] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] - Group By Operator [GBY_146] (rows=1 width=12) + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_145] (rows=12 width=4) + Select Operator [SEL_149] (rows=12 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_143] + Please refer to the previous Select Operator [SEL_147] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_158] + SHUFFLE [RS_162] PartitionCols:_col0 - Group By Operator [GBY_157] (rows=143966743 width=7) + Group By Operator [GBY_161] (rows=143966743 width=7) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_156] (rows=143966743 width=7) + Select Operator [SEL_160] (rows=143966743 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=143966743 width=7) - predicate:((ws_order_number BETWEEN DynamicValue(RS_29_ws1_ws_order_number_min) AND DynamicValue(RS_29_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_29_ws1_ws_order_number_bloom_filter))) and ws_warehouse_sk is not null) + Filter Operator [FIL_159] (rows=143966743 width=7) + predicate:((ws_order_number BETWEEN DynamicValue(RS_33_ws1_ws_order_number_min) AND DynamicValue(RS_33_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_33_ws1_ws_order_number_bloom_filter))) and ws_warehouse_sk is not null) TableScan [TS_22] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_154] - Group By Operator [GBY_153] (rows=1 width=12) + BROADCAST [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) + SHUFFLE [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=5022875 width=8) + Select Operator [SEL_113] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_123] + Please refer to the previous Merge Join Operator [MERGEJOIN_127] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out index 4dc9cd8cc0..43263b1bb6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out @@ -75,20 +75,26 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) -Map 17 <- Reducer 10 (BROADCAST_EDGE) -Map 20 <- Reducer 10 (BROADCAST_EDGE) -Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (ONE_TO_ONE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Map 1 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 18 <- Reducer 11 (BROADCAST_EDGE) +Map 21 <- Reducer 11 (BROADCAST_EDGE) +Map 22 <- Reducer 10 (BROADCAST_EDGE) +Map 26 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 24 <- Map 27 (SIMPLE_EDGE), Reducer 23 (ONE_TO_ONE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE) +Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 20 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 25 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -98,174 +104,223 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_273] - Limit [LIM_272] (rows=1 width=240) + File Output Operator [FS_294] + Limit [LIM_293] (rows=1 width=240) Number of rows:100 - Select Operator [SEL_271] (rows=1 width=240) + Select Operator [SEL_292] (rows=1 width=240) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_270] - Select Operator [SEL_269] (rows=1 width=240) + SHUFFLE [RS_291] + Select Operator [SEL_290] (rows=1 width=240) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_268] (rows=1 width=232) + Group By Operator [GBY_289] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_267] - Group By Operator [GBY_266] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_288] + Group By Operator [GBY_287] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_265] (rows=38111880083 width=228) + Group By Operator [GBY_286] (rows=2511437 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_101] + SHUFFLE [RS_109] PartitionCols:_col0 - Group By Operator [GBY_100] (rows=38111880083 width=228) + Group By Operator [GBY_108] (rows=2511437 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 - Merge Join Operator [MERGEJOIN_227] (rows=83469759007 width=227) - Conds:RS_47._col3=RS_48._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 19 [ONE_TO_ONE_EDGE] - FORWARD [RS_48] - PartitionCols:_col0 - Select Operator [SEL_34] (rows=1384229738 width=4) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_221] (rows=1384229738 width=4) - Conds:RS_31._col0=RS_264.wr_order_number(Inner),Output:["_col14"] - <-Reducer 18 [ONE_TO_ONE_EDGE] - FORWARD [RS_31] - PartitionCols:_col0 - Select Operator [SEL_29] (rows=1411940834 width=4) - Output:["_col0"] - Filter Operator [FIL_28] (rows=1411940834 width=11) - predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_220] (rows=1411940834 width=11) - Conds:RS_260._col1=RS_263._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_260] - PartitionCols:_col1 - Select Operator [SEL_259] (rows=144002668 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_258] (rows=144002668 width=7) - predicate:(in_bloom_filter(ws_order_number, DynamicValue(RS_44_ws1_ws_order_number_bloom_filter)) and ws_order_number BETWEEN DynamicValue(RS_44_ws1_ws_order_number_min) AND DynamicValue(RS_44_ws1_ws_order_number_max)) - TableScan [TS_21] (rows=144002668 width=7) - default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_179] - Group By Operator [GBY_178] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_177] (rows=5022875 width=8) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_224] (rows=5022875 width=227) - Conds:RS_41._col2=RS_246._col0(Inner),Output:["_col3","_col4","_col5"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_246] - PartitionCols:_col0 - Select Operator [SEL_245] (rows=12 width=4) - Output:["_col0"] - Filter Operator [FIL_244] (rows=12 width=92) - predicate:(web_company_name = 'pri') - TableScan [TS_9] (rows=84 width=92) - default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_223] (rows=15673790 width=231) - Conds:RS_38._col1=RS_238._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] - PartitionCols:_col0 - Select Operator [SEL_237] (rows=784314 width=4) - Output:["_col0"] - Filter Operator [FIL_236] (rows=784314 width=90) - predicate:(ca_state = 'TX') - TableScan [TS_6] (rows=40000000 width=90) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_222] (rows=15987241 width=235) - Conds:RS_254._col0=RS_230._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] - PartitionCols:_col0 - Select Operator [SEL_229] (rows=8116 width=98) - Output:["_col0"] - Filter Operator [FIL_228] (rows=8116 width=98) - predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' - TableScan [TS_3] (rows=73049 width=98) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] - PartitionCols:_col0 - Select Operator [SEL_253] (rows=143895019 width=239) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_252] (rows=143895019 width=239) - predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_39_customer_address_ca_address_sk_min) AND DynamicValue(RS_39_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_39_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_36_date_dim_d_date_sk_min) AND DynamicValue(RS_36_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_36_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_42_web_site_web_site_sk_min) AND DynamicValue(RS_42_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_42_web_site_web_site_sk_bloom_filter))) and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) - TableScan [TS_0] (rows=144002668 width=239) - default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_235] - Group By Operator [GBY_234] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] - Group By Operator [GBY_232] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_231] (rows=8116 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_229] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_243] - Group By Operator [GBY_242] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] - Group By Operator [GBY_240] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_239] (rows=784314 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_237] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_251] - Group By Operator [GBY_250] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_249] - Group By Operator [GBY_248] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_247] (rows=12 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_245] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_263] - PartitionCols:_col1 - Select Operator [SEL_262] (rows=144002668 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_261] (rows=144002668 width=7) - predicate:(in_bloom_filter(ws_order_number, DynamicValue(RS_44_ws1_ws_order_number_bloom_filter)) and ws_order_number BETWEEN DynamicValue(RS_44_ws1_ws_order_number_min) AND DynamicValue(RS_44_ws1_ws_order_number_max)) - TableScan [TS_23] (rows=144002668 width=7) - default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_257] - Please refer to the previous Group By Operator [GBY_255] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_264] - PartitionCols:wr_order_number - TableScan [TS_30] (rows=14398467 width=4) - default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] + Merge Join Operator [MERGEJOIN_235] (rows=5022875 width=227) + Conds:RS_55._col3=RS_285._col0(Inner),Output:["_col3","_col4","_col5"] <-Reducer 5 [ONE_TO_ONE_EDGE] - FORWARD [RS_47] + FORWARD [RS_55] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_226] (rows=482885639 width=227) - Conds:RS_44._col3=RS_45._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 18 [ONE_TO_ONE_EDGE] - FORWARD [RS_45] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_29] + Merge Join Operator [MERGEJOIN_234] (rows=5022875 width=227) + Conds:RS_52._col3=RS_273._col0(Inner),Output:["_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] + SHUFFLE [RS_52] PartitionCols:_col3 - Please refer to the previous Merge Join Operator [MERGEJOIN_224] + Merge Join Operator [MERGEJOIN_232] (rows=5022875 width=227) + Conds:RS_49._col2=RS_254._col0(Inner),Output:["_col3","_col4","_col5"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_254] + PartitionCols:_col0 + Select Operator [SEL_253] (rows=12 width=4) + Output:["_col0"] + Filter Operator [FIL_252] (rows=12 width=92) + predicate:(web_company_name = 'pri') + TableScan [TS_9] (rows=84 width=92) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_231] (rows=15673790 width=231) + Conds:RS_46._col1=RS_246._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + PartitionCols:_col0 + Select Operator [SEL_245] (rows=784314 width=4) + Output:["_col0"] + Filter Operator [FIL_244] (rows=784314 width=90) + predicate:(ca_state = 'TX') + TableScan [TS_6] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_230] (rows=15987241 width=235) + Conds:RS_262._col0=RS_238._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + PartitionCols:_col0 + Select Operator [SEL_237] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_236] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_262] + PartitionCols:_col0 + Select Operator [SEL_261] (rows=143895019 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_260] (rows=143895019 width=239) + predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_47_customer_address_ca_address_sk_min) AND DynamicValue(RS_47_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_47_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_44_date_dim_d_date_sk_min) AND DynamicValue(RS_44_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_44_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_50_web_site_web_site_sk_min) AND DynamicValue(RS_50_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_50_web_site_web_site_sk_bloom_filter))) and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=239) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_243] + Group By Operator [GBY_242] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] + Group By Operator [GBY_240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_239] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_237] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_251] + Group By Operator [GBY_250] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_249] + Group By Operator [GBY_248] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_247] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_245] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_259] + Group By Operator [GBY_258] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] + Group By Operator [GBY_256] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_255] (rows=12 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_253] + <-Reducer 20 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_273] + PartitionCols:_col0 + Group By Operator [GBY_272] (rows=14686712 width=4) + Output:["_col0"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Group By Operator [GBY_21] (rows=144002668 width=4) + Output:["_col0"],keys:_col1 + Select Operator [SEL_20] (rows=1411940834 width=11) + Output:["_col1"] + Filter Operator [FIL_19] (rows=1411940834 width=11) + predicate:(_col0 <> _col2) + Merge Join Operator [MERGEJOIN_233] (rows=1411940834 width=11) + Conds:RS_268._col1=RS_271._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_268] + PartitionCols:_col1 + Select Operator [SEL_267] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_266] (rows=144002668 width=7) + predicate:(in_bloom_filter(ws_order_number, DynamicValue(RS_52_ws1_ws_order_number_bloom_filter)) and ws_order_number BETWEEN DynamicValue(RS_52_ws1_ws_order_number_min) AND DynamicValue(RS_52_ws1_ws_order_number_max)) + TableScan [TS_12] (rows=144002668 width=7) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_264] + Group By Operator [GBY_263] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_187] + Group By Operator [GBY_186] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_185] (rows=5022875 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_232] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_271] + PartitionCols:_col1 + Select Operator [SEL_270] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_269] (rows=144002668 width=7) + predicate:(in_bloom_filter(ws_order_number, DynamicValue(RS_52_ws1_ws_order_number_bloom_filter)) and ws_order_number BETWEEN DynamicValue(RS_52_ws1_ws_order_number_min) AND DynamicValue(RS_52_ws1_ws_order_number_max)) + TableScan [TS_14] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_265] + Please refer to the previous Group By Operator [GBY_263] + <-Reducer 25 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_285] + PartitionCols:_col0 + Group By Operator [GBY_284] (rows=8007986 width=4) + Output:["_col0"],keys:KEY._col0 + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Group By Operator [GBY_39] (rows=14398467 width=4) + Output:["_col0"],keys:_col14 + Merge Join Operator [MERGEJOIN_229] (rows=1384229738 width=4) + Conds:RS_35._col0=RS_283.wr_order_number(Inner),Output:["_col14"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_283] + PartitionCols:wr_order_number + TableScan [TS_34] (rows=14398467 width=4) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] + <-Reducer 23 [ONE_TO_ONE_EDGE] + FORWARD [RS_35] + PartitionCols:_col0 + Select Operator [SEL_33] (rows=1411940834 width=4) + Output:["_col0"] + Filter Operator [FIL_32] (rows=1411940834 width=11) + predicate:(_col0 <> _col2) + Merge Join Operator [MERGEJOIN_228] (rows=1411940834 width=11) + Conds:RS_279._col1=RS_282._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_279] + PartitionCols:_col1 + Select Operator [SEL_278] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_277] (rows=144002668 width=7) + predicate:(in_bloom_filter(ws_order_number, DynamicValue(RS_55_ws1_ws_order_number_bloom_filter)) and ws_order_number BETWEEN DynamicValue(RS_55_ws1_ws_order_number_min) AND DynamicValue(RS_55_ws1_ws_order_number_max)) + TableScan [TS_25] (rows=144002668 width=7) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_275] + Group By Operator [GBY_274] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_206] + Group By Operator [GBY_205] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_204] (rows=5022875 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_234] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] + PartitionCols:_col1 + Select Operator [SEL_281] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_280] (rows=144002668 width=7) + predicate:(in_bloom_filter(ws_order_number, DynamicValue(RS_55_ws1_ws_order_number_bloom_filter)) and ws_order_number BETWEEN DynamicValue(RS_55_ws1_ws_order_number_min) AND DynamicValue(RS_55_ws1_ws_order_number_max)) + TableScan [TS_27] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_276] + Please refer to the previous Group By Operator [GBY_274]