Estimated Per-Host Requirements: Memory=8.01GB VCores=9 "" 147:EXCHANGE [UNPARTITIONED] | 87:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] | hash predicates: customer_uuid = user_uuid, dt = date_added_eastern | |--146:AGGREGATE [FINALIZE] | | output: count:merge(*) | | group by: user_uuid, date_added_eastern | | | 145:EXCHANGE [HASH(user_uuid,date_added_eastern)] | | | 85:AGGREGATE [STREAMING] | | output: count(*) | | group by: user_uuid, date_added_eastern | | | 144:AGGREGATE [FINALIZE] | | group by: user_uuid, state_id, date_added_eastern, last_updated_time, sku | | | 143:EXCHANGE [HASH(user_uuid,state_id,date_added_eastern,last_updated_time,sku)] | | | 84:AGGREGATE [STREAMING] | | group by: user_uuid, state_id, date_added_eastern, last_updated_time, sku | | | 80:UNION | | | |--82:SCAN HDFS [gwynniebee_closet.history_items] | | partitions=1/1 files=5 size=621.73MB | | | |--83:SCAN HDFS [gwynniebee_closet.deleted_items] | | partitions=1/1 files=7 size=1.26GB | | predicates: to_date(CAST(date_added AS TIMESTAMP)) != to_date(CAST(date_removed AS TIMESTAMP)) | | | 81:SCAN HDFS [gwynniebee_closet.active_items] | partitions=1/1 files=5 size=546.86MB | 86:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] | hash predicates: customer_uuid = customer_uuid, dt = session_date | |--142:AGGREGATE [FINALIZE] | | output: count:merge(*), sum:merge(if(ga_property_key = 1, 1, 0)), sum:merge(if(ga_property_key IN (3, 5), 1, 0)) | | group by: customer_uuid, session_date | | | 141:EXCHANGE [HASH(customer_uuid,session_date)] | | | 79:AGGREGATE [STREAMING] | | output: count(*), sum(if(ga_property_key = 1, 1, 0)), sum(if(ga_property_key IN (3, 5), 1, 0)) | | group by: customer_uuid, session_date | | | 140:AGGREGATE [FINALIZE] | | group by: session_date, customer_uuid, ga_property_key, total_visits | | | 139:EXCHANGE [HASH(session_date,customer_uuid,ga_property_key,total_visits)] | | | 78:AGGREGATE [STREAMING] | | group by: session_date, customer_uuid, ga_property_key, total_visits | | | 75:UNION | | | |--77:SCAN HDFS [caastle_bi.fact_ga_app_sessions a] | | partitions=578/581 files=578 size=493.88MB | | predicates: a.ga_property_key IN (3, 5) | | | 76:SCAN HDFS [caastle_bi.fact_ga_browser_sessions a] | partitions=883/886 files=883 size=3.69GB | predicates: a.ga_property_key = 1 | 74:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: customer_subscription_session = cu.customer_subscription_session | |--138:EXCHANGE [BROADCAST] | | | 137:AGGREGATE [FINALIZE] | | output: min:merge(CAST(scd_start AS TIMESTAMP)), max:merge(CASE WHEN scd_end IS NULL THEN now() ELSE CAST(scd_end AS TIMESTAMP) END) | | group by: cu.customer_uuid, cu.customer_subscription_session | | | 136:EXCHANGE [HASH(cu.customer_uuid,cu.customer_subscription_session)] | | | 72:AGGREGATE [STREAMING] | | output: min(CAST(a.scd_start AS TIMESTAMP)), max(CASE WHEN a.scd_end IS NULL THEN now() ELSE CAST(a.scd_end AS TIMESTAMP) END) | | group by: a.customer_uuid, a.customer_subscription_session | | | 71:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: a.tenant_key = b.tenant_key | | | |--135:EXCHANGE [BROADCAST] | | | | | 70:SCAN HDFS [caastle_bi.dim_tenant b] | | partitions=1/1 files=1 size=3.53KB | | predicates: tenant_id = 0 | | | 69:SCAN HDFS [caastle_bi.dim_customer a] | partitions=2/7 files=25 size=719.09MB | predicates: a.customer_type_key = 0 | 73:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: customer_subscription_session = cu.customer_subscription_session | |--134:EXCHANGE [BROADCAST] | | | 133:AGGREGATE [FINALIZE] | | output: min:merge(CAST(regexp_replace(CAST(cu.customer_subscription_date_key AS STRING), '(\\d{4})(\\d{2})(\\d{2})', '\\1-\\2-\\3') AS TIMESTAMP) + INTERVAL CAST(substring(cu.customer_active_start_time_key, 1, 2) AS INT) hour + INTERVAL CAST(substring(cu.customer_active_start_time_key, 4, 2) AS INT) minute) | | group by: cu.customer_uuid, cu.customer_subscription_session | | | 132:EXCHANGE [HASH(cu.customer_uuid,cu.customer_subscription_session)] | | | 68:AGGREGATE [STREAMING] | | output: min(CAST(regexp_replace(CAST(a.customer_subscription_date_key AS STRING), '(\\d{4})(\\d{2})(\\d{2})', '\\1-\\2-\\3') AS TIMESTAMP) + INTERVAL CAST(substring(a.customer_active_start_time_key, 1, 2) AS INT) hour + INTERVAL CAST(substring(a.customer_active_start_time_key, 4, 2) AS INT) minute) | | group by: a.customer_uuid, a.customer_subscription_session | | | 67:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | hash predicates: a.customer_subscription_session = a.customer_subscription_session | | other predicates: a.duration_in_hours IS NULL | | | |--131:EXCHANGE [BROADCAST] | | | | | 66:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | | hash predicates: b.tenant_key = c.tenant_key | | | other predicates: (c.tenant_id = 0 OR c.tenant_id IS NULL) | | | | | |--130:EXCHANGE [BROADCAST] | | | | | | | 64:SCAN HDFS [caastle_bi.dim_tenant c] | | | partitions=1/1 files=1 size=3.53KB | | | | | 65:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED] | | | hash predicates: b.customer_uuid = a.customer_uuid | | | other predicates: b.scd_end IS NULL | | | runtime filters: RF015 <- a.customer_uuid | | | | | |--129:EXCHANGE [HASH(a.customer_uuid)] | | | | | | | 62:SCAN HDFS [caastle_bi.customers_lt24 a] | | | partitions=1/1 files=1 size=1.20MB | | | | | 128:EXCHANGE [HASH(b.customer_uuid)] | | | | | 63:SCAN HDFS [caastle_bi.dim_customer b] | | partitions=7/7 files=55 size=2.74GB | | runtime filters: RF015 -> b.customer_uuid | | | 61:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: a.tenant_key = b.tenant_key | | | |--127:EXCHANGE [BROADCAST] | | | | | 60:SCAN HDFS [caastle_bi.dim_tenant b] | | partitions=1/1 files=1 size=3.53KB | | predicates: tenant_id = 0 | | | 59:SCAN HDFS [caastle_bi.dim_customer a] | partitions=3/7 files=27 size=1.19GB | predicates: a.customer_type_key = 0 | 58:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] | hash predicates: customer_uuid = customer_uuid, dt = to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) | other join predicates: '2016-06-20' <= dt | |--126:EXCHANGE [HASH(customer_uuid,to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day))] | | | 125:AGGREGATE [FINALIZE] | | group by: customer_uuid, s.segment_name, s.segment_id, '% Worn', '2016-06-20', to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) | | having: to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 2 day) | | | 124:EXCHANGE [HASH(customer_uuid,s.segment_name,s.segment_id,'% Worn','2016-06-20',to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day))] | | | 56:AGGREGATE [STREAMING] | | group by: a.customer_uuid, a.segment_name, a.segment_id, '% Worn', '2016-06-20', to_date(CAST(to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) AS TIMESTAMP) - INTERVAL 1 day) | | | 55:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: a.segment_instance_key = max(i.segment_instance_key) | | | |--123:EXCHANGE [BROADCAST] | | | | | 122:AGGREGATE [FINALIZE] | | | output: max:merge(i.segment_instance_key) | | | group by: segment_id, to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) | | | having: to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 1 day) | | | | | 121:EXCHANGE [HASH(segment_id,to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')))] | | | | | 52:AGGREGATE [STREAMING] | | | output: max(a.segment_instance_key) | | | group by: a.segment_id, to_date(from_utc_timestamp(a.segment_instance_created_at, 'EDT')) | | | | | 51:HASH JOIN [INNER JOIN, BROADCAST] | | | hash predicates: a.segment_instance_key = a.segment_instance_key | | | runtime filters: RF012 <- a.segment_instance_key | | | | | |--120:EXCHANGE [BROADCAST] | | | | | | | 49:SCAN HDFS [caastle_bi.dim_segment_instance a] | | | partitions=1/1 files=1 size=8.26MB | | | predicates: a.segment_instance_status = 'Refreshed' | | | | | 50:HASH JOIN [INNER JOIN, BROADCAST] | | | hash predicates: a.segment_key = a.segment_key | | | runtime filters: RF013 <- a.segment_key | | | | | |--119:EXCHANGE [BROADCAST] | | | | | | | 48:SCAN HDFS [caastle_bi.dim_segment a] | | | partitions=1/1 files=2 size=88.09MB | | | predicates: a.segment_id IN (657, 658, 659, 660, 661) | | | | | 47:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | | hash predicates: b.tenant_key = c.tenant_key | | | other predicates: (c.tenant_id = 0 OR c.tenant_id IS NULL) | | | | | |--118:EXCHANGE [BROADCAST] | | | | | | | 45:SCAN HDFS [caastle_bi.dim_tenant c] | | | partitions=1/1 files=1 size=3.53KB | | | | | 46:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | | hash predicates: a.customer_key = b.customer_key | | | | | |--117:EXCHANGE [BROADCAST] | | | | | | | 44:SCAN HDFS [caastle_bi.dim_customer b] | | | partitions=7/7 files=55 size=2.74GB | | | | | 43:SCAN HDFS [caastle_bi.fact_customer_segments a] | | partitions=58726/58866 files=58726 size=18.96GB | | runtime filters: RF013 -> a.segment_key, RF012 -> a.segment_instance_key | | | 54:HASH JOIN [INNER JOIN, PARTITIONED] | | hash predicates: a.customer_key = a.customer_key | | | |--116:EXCHANGE [HASH(a.customer_key)] | | | | | 41:HASH JOIN [INNER JOIN, BROADCAST] | | | hash predicates: a.tenant_key = b.tenant_key | | | | | |--114:EXCHANGE [BROADCAST] | | | | | | | 40:SCAN HDFS [caastle_bi.dim_tenant b] | | | partitions=1/1 files=1 size=3.53KB | | | predicates: tenant_id = 0 | | | | | 39:SCAN HDFS [caastle_bi.dim_customer a] | | partitions=7/7 files=55 size=2.74GB | | | 115:EXCHANGE [HASH(a.customer_key)] | | | 53:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: a.segment_key = a.segment_key | | runtime filters: RF010 <- a.segment_key | | | |--113:EXCHANGE [BROADCAST] | | | | | 42:SCAN HDFS [caastle_bi.dim_segment a] | | partitions=1/1 files=2 size=88.09MB | | predicates: a.segment_id IN (657, 658, 659, 660, 661) | | | 38:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | hash predicates: b.tenant_key = c.tenant_key | | other predicates: (c.tenant_id = 0 OR c.tenant_id IS NULL) | | | |--112:EXCHANGE [BROADCAST] | | | | | 36:SCAN HDFS [caastle_bi.dim_tenant c] | | partitions=1/1 files=1 size=3.53KB | | | 37:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | hash predicates: a.customer_key = b.customer_key | | | |--111:EXCHANGE [BROADCAST] | | | | | 35:SCAN HDFS [caastle_bi.dim_customer b] | | partitions=7/7 files=55 size=2.74GB | | | 34:SCAN HDFS [caastle_bi.fact_customer_segments a] | partitions=58726/58866 files=58726 size=18.96GB | runtime filters: RF010 -> a.segment_key | 57:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] | hash predicates: customer_uuid = customer_uuid, dt = to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) | other join predicates: '2016-05-10' <= dt | |--110:EXCHANGE [HASH(customer_uuid,to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day))] | | | 108:AGGREGATE [FINALIZE] | | group by: customer_uuid, s.segment_name, s.segment_id, 'Closet Rate', '2016-05-10', to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) | | having: to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day) <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 2 day) | | | 107:EXCHANGE [HASH(customer_uuid,s.segment_name,s.segment_id,'Closet Rate','2016-05-10',to_date(CAST(segment_instance_create_date_eastern AS TIMESTAMP) - INTERVAL 1 day))] | | | 33:AGGREGATE [STREAMING] | | group by: a.customer_uuid, a.segment_name, a.segment_id, 'Closet Rate', '2016-05-10', to_date(CAST(to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) AS TIMESTAMP) - INTERVAL 1 day) | | | 32:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: a.segment_instance_key = max(i.segment_instance_key) | | | |--106:EXCHANGE [BROADCAST] | | | | | 105:AGGREGATE [FINALIZE] | | | output: max:merge(i.segment_instance_key) | | | group by: segment_id, to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) | | | having: to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')) <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 1 day) | | | | | 104:EXCHANGE [HASH(segment_id,to_date(from_utc_timestamp(segment_instance_created_at, 'EDT')))] | | | | | 29:AGGREGATE [STREAMING] | | | output: max(a.segment_instance_key) | | | group by: a.segment_id, to_date(from_utc_timestamp(a.segment_instance_created_at, 'EDT')) | | | | | 28:HASH JOIN [INNER JOIN, BROADCAST] | | | hash predicates: a.segment_instance_key = a.segment_instance_key | | | runtime filters: RF006 <- a.segment_instance_key | | | | | |--103:EXCHANGE [BROADCAST] | | | | | | | 26:SCAN HDFS [caastle_bi.dim_segment_instance a] | | | partitions=1/1 files=1 size=8.26MB | | | predicates: a.segment_instance_status = 'Refreshed' | | | | | 27:HASH JOIN [INNER JOIN, BROADCAST] | | | hash predicates: a.segment_key = a.segment_key | | | runtime filters: RF007 <- a.segment_key | | | | | |--102:EXCHANGE [BROADCAST] | | | | | | | 25:SCAN HDFS [caastle_bi.dim_segment a] | | | partitions=1/1 files=2 size=88.09MB | | | predicates: a.segment_id IN (490, 491, 492, 493) | | | | | 24:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | | hash predicates: b.tenant_key = c.tenant_key | | | other predicates: (c.tenant_id = 0 OR c.tenant_id IS NULL) | | | | | |--101:EXCHANGE [BROADCAST] | | | | | | | 22:SCAN HDFS [caastle_bi.dim_tenant c] | | | partitions=1/1 files=1 size=3.53KB | | | | | 23:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | | hash predicates: a.customer_key = b.customer_key | | | | | |--100:EXCHANGE [BROADCAST] | | | | | | | 21:SCAN HDFS [caastle_bi.dim_customer b] | | | partitions=7/7 files=55 size=2.74GB | | | | | 20:SCAN HDFS [caastle_bi.fact_customer_segments a] | | partitions=58726/58866 files=58726 size=18.96GB | | runtime filters: RF007 -> a.segment_key, RF006 -> a.segment_instance_key | | | 31:HASH JOIN [INNER JOIN, PARTITIONED] | | hash predicates: a.customer_key = a.customer_key | | | |--99:EXCHANGE [HASH(a.customer_key)] | | | | | 18:HASH JOIN [INNER JOIN, BROADCAST] | | | hash predicates: a.tenant_key = b.tenant_key | | | runtime filters: RF005 <- b.tenant_key | | | | | |--97:EXCHANGE [BROADCAST] | | | | | | | 17:SCAN HDFS [caastle_bi.dim_tenant b] | | | partitions=1/1 files=1 size=3.53KB | | | predicates: tenant_id = 0 | | | | | 16:SCAN HDFS [caastle_bi.dim_customer a] | | partitions=7/7 files=55 size=2.74GB | | runtime filters: RF005 -> a.tenant_key | | | 98:EXCHANGE [HASH(a.customer_key)] | | | 30:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: a.segment_key = a.segment_key | | runtime filters: RF004 <- a.segment_key | | | |--96:EXCHANGE [BROADCAST] | | | | | 19:SCAN HDFS [caastle_bi.dim_segment a] | | partitions=1/1 files=2 size=88.09MB | | predicates: a.segment_id IN (490, 491, 492, 493) | | | 15:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | hash predicates: b.tenant_key = c.tenant_key | | other predicates: (c.tenant_id = 0 OR c.tenant_id IS NULL) | | | |--95:EXCHANGE [BROADCAST] | | | | | 13:SCAN HDFS [caastle_bi.dim_tenant c] | | partitions=1/1 files=1 size=3.53KB | | | 14:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | hash predicates: a.customer_key = b.customer_key | | | |--94:EXCHANGE [BROADCAST] | | | | | 12:SCAN HDFS [caastle_bi.dim_customer b] | | partitions=7/7 files=55 size=2.74GB | | | 11:SCAN HDFS [caastle_bi.fact_customer_segments a] | partitions=58726/58866 files=58726 size=18.96GB | runtime filters: RF004 -> a.segment_key | 109:EXCHANGE [HASH(customer_uuid,dt)] | 10:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: customer_uuid = a.customer_uuid | other predicates: a.customer_type_key = 0 | |--93:EXCHANGE [BROADCAST] | | | 08:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: a.tenant_key = b.tenant_key | | runtime filters: RF001 <- b.tenant_key | | | |--92:EXCHANGE [BROADCAST] | | | | | 07:SCAN HDFS [caastle_bi.dim_tenant b] | | partitions=1/1 files=1 size=3.53KB | | predicates: tenant_id = 0 | | | 06:SCAN HDFS [caastle_bi.dim_customer a] | partitions=7/7 files=55 size=2.74GB | predicates: a.customer_type_key = 0, a.scd_end IS NULL | runtime filters: RF001 -> a.tenant_key | 09:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: customer_subscription_session = a.customer_subscription_session | other predicates: a.duration_in_hours IS NULL | |--91:EXCHANGE [BROADCAST] | | | 05:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | | hash predicates: b.tenant_key = c.tenant_key | | other predicates: (c.tenant_id = 0 OR c.tenant_id IS NULL) | | | |--90:EXCHANGE [BROADCAST] | | | | | 03:SCAN HDFS [caastle_bi.dim_tenant c] | | partitions=1/1 files=1 size=3.53KB | | | 04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED] | | hash predicates: b.customer_uuid = a.customer_uuid | | other predicates: b.scd_end IS NULL | | runtime filters: RF000 <- a.customer_uuid | | | |--89:EXCHANGE [HASH(a.customer_uuid)] | | | | | 01:SCAN HDFS [caastle_bi.customers_lt24 a] | | partitions=1/1 files=1 size=1.20MB | | | 88:EXCHANGE [HASH(b.customer_uuid)] | | | 02:SCAN HDFS [caastle_bi.dim_customer b] | partitions=7/7 files=55 size=2.74GB | runtime filters: RF000 -> b.customer_uuid | 00:SCAN HDFS [gwynniebee_bi_dgt.summary_customer] partitions=2855/7153 files=5765 size=4.29GB predicates: dt >= to_date(trunc(from_utc_timestamp(now(), 'EDT') - INTERVAL 6 month, 'D')), dt <= to_date(from_utc_timestamp(now(), 'EDT') - INTERVAL 2 day)