Estimated Per-Host Requirements: Memory=5.07GB VCores=7 "" 96:EXCHANGE [UNPARTITIONED] | 53:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] | hash predicates: customer_uuid = user_uuid, dt = date_added_eastern | |--95:AGGREGATE [FINALIZE] | | output: count:merge(*) | | group by: user_uuid, date_added_eastern | | | 94:EXCHANGE [HASH(user_uuid,date_added_eastern)] | | | 51:AGGREGATE [STREAMING] | | output: count(*) | | group by: user_uuid, date_added_eastern | | | 93:AGGREGATE [FINALIZE] | | group by: user_uuid, state_id, date_added_eastern, last_updated_time, sku | | | 92:EXCHANGE [HASH(user_uuid,state_id,date_added_eastern,last_updated_time,sku)] | | | 50:AGGREGATE [STREAMING] | | group by: user_uuid, state_id, date_added_eastern, last_updated_time, sku | | | 46:UNION | | | |--48:SCAN HDFS [gwynniebee_closet.history_items] | | partitions=1/1 files=5 size=621.73MB | | | |--49:SCAN HDFS [gwynniebee_closet.deleted_items] | | partitions=1/1 files=7 size=1.26GB | | predicates: to_date(CAST(date_added AS TIMESTAMP)) != to_date(CAST(date_removed AS TIMESTAMP)) | | | 47:SCAN HDFS [gwynniebee_closet.active_items] | partitions=1/1 files=5 size=546.86MB | 52:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] | hash predicates: customer_uuid = customer_uuid, dt = session_date | |--91:AGGREGATE [FINALIZE] | | output: count:merge(*), sum:merge(if(ga_property_key = 1, 1, 0)), sum:merge(if(ga_property_key IN (3, 5), 1, 0)) | | group by: customer_uuid, session_date | | | 90:EXCHANGE [HASH(customer_uuid,session_date)] | | | 45:AGGREGATE [STREAMING] | | output: count(*), sum(if(ga_property_key = 1, 1, 0)), sum(if(ga_property_key IN (3, 5), 1, 0)) | | group by: customer_uuid, session_date | | | 89:AGGREGATE [FINALIZE] | | group by: session_date, customer_uuid, ga_property_key, total_visits | | | 88:EXCHANGE [HASH(session_date,customer_uuid,ga_property_key,total_visits)] | | | 44:AGGREGATE [STREAMING] | | group by: session_date, customer_uuid, ga_property_key, total_visits | | | 41:UNION | | | |--43:SCAN HDFS [caastle_bi.fact_ga_app_sessions ga] | | partitions=578/581 files=578 size=493.88MB | | predicates: ga_property_key IN (3, 5) | | | 42:SCAN HDFS [caastle_bi.fact_ga_browser_sessions ga] | partitions=883/886 files=883 size=3.69GB | predicates: ga_property_key = 1 | 40:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: customer_subscription_session = cu.customer_subscription_session | |--87:EXCHANGE [BROADCAST] | | | 86:AGGREGATE [FINALIZE] | | output: min:merge(CAST(scd_start AS TIMESTAMP)), max:merge(CASE WHEN scd_end IS NULL THEN now() ELSE CAST(scd_end AS TIMESTAMP) END) | | group by: cu.customer_uuid, cu.customer_subscription_session | | | 85:EXCHANGE [HASH(cu.customer_uuid,cu.customer_subscription_session)] | | | 38:AGGREGATE [STREAMING] | | output: min(CAST(scd_start AS TIMESTAMP)), max(CASE WHEN scd_end IS NULL THEN now() ELSE CAST(scd_end AS TIMESTAMP) END) | | group by: cu.customer_uuid, cu.customer_subscription_session | | | 37:SCAN HDFS [caastle_bi.dim_customer cu] | partitions=2/7 files=25 size=719.09MB | predicates: cu.customer_type_key = 0 | 39:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: customer_subscription_session = cu.customer_subscription_session | |--84:EXCHANGE [BROADCAST] | | | 83:AGGREGATE [FINALIZE] | | output: min:merge(CAST(regexp_replace(CAST(cu.customer_subscription_date_key AS STRING), '(\\d{4})(\\d{2})(\\d{2})', '\\1-\\2-\\3') AS TIMESTAMP) + INTERVAL CAST(substring(cu.customer_active_start_time_key, 1, 2) AS INT) hour + INTERVAL CAST(substring(cu.customer_active_start_time_key, 4, 2) AS INT) minute) | | group by: cu.customer_uuid, cu.customer_subscription_session | | | 82:EXCHANGE [HASH(cu.customer_uuid,cu.customer_subscription_session)] | | | 36:AGGREGATE [STREAMING] | | output: min(CAST(regexp_replace(CAST(cu.customer_subscription_date_key AS STRING), '(\\d{4})(\\d{2})(\\d{2})', '\\1-\\2-\\3') AS TIMESTAMP) + INTERVAL CAST(substring(cu.customer_active_start_time_key, 1, 2) AS INT) hour + INTERVAL CAST(substring(cu.customer_active_start_time_key, 4, 2) AS INT) minute) | | group by: cu.customer_uuid, cu.customer_subscription_session | | | 35:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | hash predicates: cu.customer_subscription_session = b.customer_subscription_session | | other predicates: b.duration_in_hours IS NULL | | | |--81:EXCHANGE [BROADCAST] | | | | | 34:SCAN HDFS [caastle_bi.customers_lt24 b] | | partitions=1/1 files=1 size=1.20MB | | | 33:SCAN HDFS [caastle_bi.dim_customer cu] | partitions=3/7 files=27 size=1.19GB | predicates: cu.customer_type_key = 0 | 32:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] | hash predicates: customer_uuid = customer_uuid, dt = to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) | other join predicates: '2016-06-20' <= dt | |--80:EXCHANGE [HASH(customer_uuid,to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day))] | | | 78:AGGREGATE [FINALIZE] | | group by: customer_uuid, s.segment_name, s.segment_id, '% Worn', '2016-06-20', to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) | | having: to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 2 day) | | | 77:EXCHANGE [HASH(customer_uuid,s.segment_name,s.segment_id,'% Worn','2016-06-20',to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day))] | | | 30:AGGREGATE [STREAMING] | | group by: customer_uuid, s.segment_name, s.segment_id, '% Worn', '2016-06-20', to_date(CAST(to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) AS TIMESTAMP) - INTERVAL 1 day) | | | 29:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: cs.segment_instance_key = max(i.segment_instance_key) | | runtime filters: RF006 <- max(i.segment_instance_key) | | | |--76:EXCHANGE [BROADCAST] | | | | | 75:AGGREGATE [FINALIZE] | | | output: max:merge(i.segment_instance_key) | | | group by: segment_id, to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) | | | having: to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 1 day) | | | | | 74:EXCHANGE [HASH(segment_id,to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')))] | | | | | 26:AGGREGATE [STREAMING] | | | output: max(i.segment_instance_key) | | | group by: segment_id, to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) | | | | | 25:HASH JOIN [INNER JOIN, BROADCAST] | | | hash predicates: cs.segment_instance_key = i.segment_instance_key | | | runtime filters: RF009 <- i.segment_instance_key | | | | | |--73:EXCHANGE [BROADCAST] | | | | | | | 23:SCAN HDFS [caastle_bi.dim_segment_instance i] | | | partitions=1/1 files=1 size=8.26MB | | | predicates: segment_instance_status = 'Refreshed' | | | | | 24:HASH JOIN [INNER JOIN, BROADCAST] | | | hash predicates: cs.segment_key = s.segment_key | | | runtime filters: RF010 <- s.segment_key | | | | | |--72:EXCHANGE [BROADCAST] | | | | | | | 22:SCAN HDFS [caastle_bi.dim_segment s] | | | partitions=1/1 files=2 size=88.09MB | | | predicates: segment_id IN (657, 658, 659, 660, 661) | | | | | 21:SCAN HDFS [caastle_bi.fact_customer_segments cs] | | partitions=58726/58866 files=58726 size=18.96GB | | runtime filters: RF010 -> cs.segment_key, RF009 -> cs.segment_instance_key | | | 28:HASH JOIN [INNER JOIN, PARTITIONED] | | hash predicates: cs.customer_key = cu.customer_key | | runtime filters: RF007 <- cu.customer_key | | | |--71:EXCHANGE [HASH(cu.customer_key)] | | | | | 19:SCAN HDFS [caastle_bi.dim_customer cu] | | partitions=7/7 files=55 size=2.74GB | | | 70:EXCHANGE [HASH(cs.customer_key)] | | | 27:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: cs.segment_key = s.segment_key | | runtime filters: RF008 <- s.segment_key | | | |--69:EXCHANGE [BROADCAST] | | | | | 20:SCAN HDFS [caastle_bi.dim_segment s] | | partitions=1/1 files=2 size=88.09MB | | predicates: s.segment_id IN (657, 658, 659, 660, 661) | | | 18:SCAN HDFS [caastle_bi.fact_customer_segments cs] | partitions=58726/58866 files=58726 size=18.96GB | runtime filters: RF008 -> cs.segment_key, RF006 -> cs.segment_instance_key, RF007 -> cs.customer_key | 79:EXCHANGE [HASH(customer_uuid,dt)] | 31:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED] | hash predicates: customer_uuid = customer_uuid, to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) = dt | other join predicates: '2016-05-10' <= dt | |--68:EXCHANGE [HASH(customer_uuid,dt)] | | | 04:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] | | hash predicates: customer_uuid = dim_customer.customer_uuid | | other predicates: dim_customer.customer_type_key = 0 | | | |--66:EXCHANGE [HASH(dim_customer.customer_uuid)] | | | | | 02:SCAN HDFS [caastle_bi.dim_customer] | | partitions=7/7 files=55 size=2.74GB | | predicates: caastle_bi.dim_customer.customer_type_key = 0, dim_customer.scd_end IS NULL | | | 65:EXCHANGE [HASH(customer_uuid)] | | | 03:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | hash predicates: customer_subscription_session = l24.customer_subscription_session | | other predicates: l24.duration_in_hours IS NULL | | | |--64:EXCHANGE [BROADCAST] | | | | | 01:SCAN HDFS [caastle_bi.customers_lt24 l24] | | partitions=1/1 files=1 size=1.20MB | | | 00:SCAN HDFS [gwynniebee_bi_dgt.summary_customer] | partitions=2855/7153 files=5765 size=4.29GB | predicates: dt >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), dt <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 2 day) | 67:EXCHANGE [HASH(customer_uuid,to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day))] | 63:AGGREGATE [FINALIZE] | group by: customer_uuid, s.segment_name, s.segment_id, 'Closet Rate', '2016-05-10', to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) | having: to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 2 day) | 62:EXCHANGE [HASH(customer_uuid,s.segment_name,s.segment_id,'Closet Rate','2016-05-10',to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day))] | 17:AGGREGATE [STREAMING] | group by: customer_uuid, s.segment_name, s.segment_id, 'Closet Rate', '2016-05-10', to_date(CAST(to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) AS TIMESTAMP) - INTERVAL 1 day) | 16:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: cs.segment_instance_key = max(i.segment_instance_key) | runtime filters: RF001 <- max(i.segment_instance_key) | |--61:EXCHANGE [BROADCAST] | | | 60:AGGREGATE [FINALIZE] | | output: max:merge(i.segment_instance_key) | | group by: segment_id, to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) | | having: to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 1 day) | | | 59:EXCHANGE [HASH(segment_id,to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')))] | | | 13:AGGREGATE [STREAMING] | | output: max(i.segment_instance_key) | | group by: segment_id, to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) | | | 12:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: cs.segment_instance_key = i.segment_instance_key | | runtime filters: RF004 <- i.segment_instance_key | | | |--58:EXCHANGE [BROADCAST] | | | | | 10:SCAN HDFS [caastle_bi.dim_segment_instance i] | | partitions=1/1 files=1 size=8.26MB | | predicates: segment_instance_status = 'Refreshed' | | | 11:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: cs.segment_key = s.segment_key | | runtime filters: RF005 <- s.segment_key | | | |--57:EXCHANGE [BROADCAST] | | | | | 09:SCAN HDFS [caastle_bi.dim_segment s] | | partitions=1/1 files=2 size=88.09MB | | predicates: segment_id IN (490, 491, 492, 493) | | | 08:SCAN HDFS [caastle_bi.fact_customer_segments cs] | partitions=58726/58866 files=58726 size=18.96GB | runtime filters: RF005 -> cs.segment_key, RF004 -> cs.segment_instance_key | 15:HASH JOIN [INNER JOIN, PARTITIONED] | hash predicates: cs.customer_key = cu.customer_key | runtime filters: RF002 <- cu.customer_key | |--56:EXCHANGE [HASH(cu.customer_key)] | | | 06:SCAN HDFS [caastle_bi.dim_customer cu] | partitions=7/7 files=55 size=2.74GB | 55:EXCHANGE [HASH(cs.customer_key)] | 14:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: cs.segment_key = s.segment_key | runtime filters: RF003 <- s.segment_key | |--54:EXCHANGE [BROADCAST] | | | 07:SCAN HDFS [caastle_bi.dim_segment s] | partitions=1/1 files=2 size=88.09MB | predicates: s.segment_id IN (490, 491, 492, 493) | 05:SCAN HDFS [caastle_bi.fact_customer_segments cs] partitions=58726/58866 files=58726 size=18.96GB runtime filters: RF003 -> cs.segment_key, RF001 -> cs.segment_instance_key, RF002 -> cs.customer_key