Estimated Per-Host Requirements: Memory=1.60GB VCores=4 WARNING: The following tables are missing relevant table and/or column statistics. events.enriched 38:EXCHANGE [UNPARTITIONED] | 29:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: destination_url = concat('thumbtack.com/', regexp_replace(regexp_replace(lpp_path, '\\.', '/'), '_', '-')) | |--37:EXCHANGE [BROADCAST] | | | 28:SCAN HDFS [default.lpp_landing_page_paths] | partitions=1/1 files=1 size=85.44MB | 27:SELECT | predicates: rank() = 1 | 26:ANALYTIC | functions: rank() | partition by: concat(url, destination_type) | order by: last_updated_time DESC, row_number() DESC | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW | 25:SORT | order by: concat(lower(concat(domain, CASE WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/home(.htm(l)?)?$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/home(.php)?$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/index(.htm(l)?)?$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/index(.php)?$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '.php$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/.htm' THEN strleft(parse_url(raw_url, 'PATH'), locate('/.htm', parse_url(raw_url, 'PATH')) - 1) WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '.htm' THEN strleft(parse_url(raw_url, 'PATH'), locate('.htm', parse_url(raw_url, 'PATH')) - 1) WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '//$' THEN strleft(parse_url(raw_url, 'PATH'), char_length(parse_url(raw_url, 'PATH')) - 2) WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/$' THEN strleft(parse_url(raw_url, 'PATH'), char_length(parse_url(raw_url, 'PATH')) - 1) ELSE lower(parse_url(raw_url, 'PATH')) END)), destination_type) ASC NULLS FIRST, last_updated_time DESC, row_number() DESC | 36:EXCHANGE [HASH(concat(lower(concat(domain, CASE WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/home(.htm(l)?)?$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/home(.php)?$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/index(.htm(l)?)?$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/index(.php)?$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '.php$' THEN '' WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/.htm' THEN strleft(parse_url(raw_url, 'PATH'), locate('/.htm', parse_url(raw_url, 'PATH')) - 1) WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '.htm' THEN strleft(parse_url(raw_url, 'PATH'), locate('.htm', parse_url(raw_url, 'PATH')) - 1) WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '//$' THEN strleft(parse_url(raw_url, 'PATH'), char_length(parse_url(raw_url, 'PATH')) - 2) WHEN lower(parse_url(raw_url, 'PATH')) RLIKE '/$' THEN strleft(parse_url(raw_url, 'PATH'), char_length(parse_url(raw_url, 'PATH')) - 1) ELSE lower(parse_url(raw_url, 'PATH')) END)), destination_type))] | 24:ANALYTIC | functions: row_number() | order by: last_updated_time DESC | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW | 35:MERGING-EXCHANGE [UNPARTITIONED] | order by: last_updated_time DESC | 23:SORT | order by: last_updated_time DESC | 00:UNION | |--01:SCAN HDFS [thunderdome.clk_crawled_links] | partitions=1/1 files=1 size=26.73MB | predicates: char_length(clk_url) > 0, char_length(clk_destination_url) > 0, thunderdome.clk_crawled_links.clk_url IS NOT NULL | 02:UNION | |--22:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: lpb_lpp_landing_page_path_id = lpp_landing_page_path_id | | | |--34:EXCHANGE [BROADCAST] | | | | | 20:SCAN HDFS [default.lpp_landing_page_paths] | | partitions=1/1 files=1 size=85.44MB | | | 21:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: service_id = lpb_sav_available_service_id | | | |--33:EXCHANGE [BROADCAST] | | | | | 19:SCAN HDFS [default.lpb_landing_page_best_pros] | | partitions=1/1 files=8 size=1.34MB | | predicates: lpb_landing_page_best_pro_id IS NOT NULL | | | 15:SUBPLAN | | | |--18:NESTED LOOP JOIN [CROSS JOIN] | | | | | |--16:SINGULAR ROW SRC | | | | | 17:UNNEST [events.enriched.kv_pairs] | | | 14:SCAN HDFS [events.enriched] | partitions=39/1469 files=1248 size=139.36GB | predicates: event_type LIKE 'widgets/added link to website', char_length(referer) > 0, events.enriched.referer IS NOT NULL | predicates on kv_pairs: kv_pairs.key = 'widget_id', kv_pairs.value RLIKE 'review(_widget)?' | 13:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: blg_lpp_landing_page_path_id = lpp_landing_page_path_id | |--32:EXCHANGE [BROADCAST] | | | 10:SCAN HDFS [default.lpp_landing_page_paths] | partitions=1/1 files=1 size=85.44MB | 12:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: service_id = blg_sav_available_service_id | |--31:EXCHANGE [BROADCAST] | | | 09:SCAN HDFS [default.blg_blog_posts] | partitions=1/1 files=1 size=15.34MB | 11:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: service_id = sur_sav_available_service_id | |--30:EXCHANGE [BROADCAST] | | | 08:SCAN HDFS [default.sur_service_urls] | partitions=1/1 files=1 size=52.78MB | 04:SUBPLAN | |--07:NESTED LOOP JOIN [CROSS JOIN] | | | |--05:SINGULAR ROW SRC | | | 06:UNNEST [events.enriched.kv_pairs] | 03:SCAN HDFS [events.enriched] partitions=39/1469 files=1248 size=139.36GB predicates: event_type LIKE 'widgets/added link to website', char_length(referer) > 0, events.enriched.referer IS NOT NULL predicates on kv_pairs: kv_pairs.key = 'widget_id'