Description
Rewrite join condition to conjunctive normal form to push more conditions to filter.
PostgreSQL:
CREATE TABLE lineitem (l_orderkey BIGINT, l_partkey BIGINT, l_suppkey BIGINT, l_linenumber INT, l_quantity DECIMAL(10,0), l_extendedprice DECIMAL(10,0), l_discount DECIMAL(10,0), l_tax DECIMAL(10,0), l_returnflag varchar(255), l_linestatus varchar(255), l_shipdate DATE, l_commitdate DATE, l_receiptdate DATE, l_shipinstruct varchar(255), l_shipmode varchar(255), l_comment varchar(255)); CREATE TABLE orders ( o_orderkey BIGINT, o_custkey BIGINT, o_orderstatus varchar(255), o_totalprice DECIMAL(10,0), o_orderdate DATE, o_orderpriority varchar(255), o_clerk varchar(255), o_shippriority INT, o_comment varchar(255)); EXPLAIN SELECT Count(*) FROM lineitem, orders WHERE l_orderkey = o_orderkey AND ( ( l_suppkey > 3 AND o_custkey > 13 ) OR ( l_suppkey > 1 AND o_custkey > 11 ) ) AND l_partkey > 19; EXPLAIN SELECT Count(*) FROM lineitem JOIN orders ON l_orderkey = o_orderkey AND ( ( l_suppkey > 3 AND o_custkey > 13 ) OR ( l_suppkey > 1 AND o_custkey > 11 ) ) AND l_partkey > 19; EXPLAIN SELECT Count(*) FROM lineitem, orders WHERE l_orderkey = o_orderkey AND NOT ( ( l_suppkey > 3 AND ( l_suppkey > 2 OR o_custkey > 13 ) ) OR ( l_suppkey > 1 AND o_custkey > 11 ) ) AND l_partkey > 19;
postgres=# EXPLAIN postgres-# SELECT Count(*) postgres-# FROM lineitem, postgres-# orders postgres-# WHERE l_orderkey = o_orderkey postgres-# AND ( ( l_suppkey > 3 postgres(# AND o_custkey > 13 ) postgres(# OR ( l_suppkey > 1 postgres(# AND o_custkey > 11 ) ) postgres-# AND l_partkey > 19; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------- Aggregate (cost=21.18..21.19 rows=1 width=8) -> Hash Join (cost=10.60..21.17 rows=2 width=0) Hash Cond: (orders.o_orderkey = lineitem.l_orderkey) Join Filter: (((lineitem.l_suppkey > 3) AND (orders.o_custkey > 13)) OR ((lineitem.l_suppkey > 1) AND (orders.o_custkey > 11))) -> Seq Scan on orders (cost=0.00..10.45 rows=17 width=16) Filter: ((o_custkey > 13) OR (o_custkey > 11)) -> Hash (cost=10.53..10.53 rows=6 width=16) -> Seq Scan on lineitem (cost=0.00..10.53 rows=6 width=16) Filter: ((l_partkey > 19) AND ((l_suppkey > 3) OR (l_suppkey > 1))) (9 rows) postgres=# EXPLAIN postgres-# SELECT Count(*) postgres-# FROM lineitem postgres-# JOIN orders postgres-# ON l_orderkey = o_orderkey postgres-# AND ( ( l_suppkey > 3 postgres(# AND o_custkey > 13 ) postgres(# OR ( l_suppkey > 1 postgres(# AND o_custkey > 11 ) ) postgres-# AND l_partkey > 19; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------- Aggregate (cost=21.18..21.19 rows=1 width=8) -> Hash Join (cost=10.60..21.17 rows=2 width=0) Hash Cond: (orders.o_orderkey = lineitem.l_orderkey) Join Filter: (((lineitem.l_suppkey > 3) AND (orders.o_custkey > 13)) OR ((lineitem.l_suppkey > 1) AND (orders.o_custkey > 11))) -> Seq Scan on orders (cost=0.00..10.45 rows=17 width=16) Filter: ((o_custkey > 13) OR (o_custkey > 11)) -> Hash (cost=10.53..10.53 rows=6 width=16) -> Seq Scan on lineitem (cost=0.00..10.53 rows=6 width=16) Filter: ((l_partkey > 19) AND ((l_suppkey > 3) OR (l_suppkey > 1))) (9 rows) postgres=# EXPLAIN postgres-# SELECT Count(*) postgres-# FROM lineitem, postgres-# orders postgres-# WHERE l_orderkey = o_orderkey postgres-# AND NOT ( ( l_suppkey > 3 postgres(# AND ( l_suppkey > 2 postgres(# OR o_custkey > 13 ) ) postgres(# OR ( l_suppkey > 1 postgres(# AND o_custkey > 11 ) ) postgres-# AND l_partkey > 19; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Aggregate (cost=21.15..21.16 rows=1 width=8) -> Hash Join (cost=10.60..21.15 rows=2 width=0) Hash Cond: (orders.o_orderkey = lineitem.l_orderkey) Join Filter: (((lineitem.l_suppkey <= 1) OR (orders.o_custkey <= 11)) AND ((lineitem.l_suppkey <= 3) OR ((lineitem.l_suppkey <= 2) AND (orders.o_custkey <= 13)))) -> Seq Scan on orders (cost=0.00..10.30 rows=30 width=16) -> Hash (cost=10.53..10.53 rows=6 width=16) -> Seq Scan on lineitem (cost=0.00..10.53 rows=6 width=16) Filter: ((l_partkey > 19) AND ((l_suppkey <= 3) OR (l_suppkey <= 2))) (8 rows)
https://docs.teradata.com/reader/i_VlYHwN0b8knh6AEWrv1Q/Bh~37Qcc2~24P_jn2~0w6w
Attachments
Issue Links
- is duplicated by
-
SPARK-27280 infer filters from Join's OR condition
- Resolved
- is related to
-
SPARK-6624 Convert filters into CNF for data sources
- Resolved
-
SPARK-17357 Simplified predicates can't be pushed down through operators because of the rule order in Optimizer
- Resolved
-
SPARK-27280 infer filters from Join's OR condition
- Resolved
- links to