Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 File Output Operator [FS_17] Select Operator [SEL_16] (rows=88846043 width=2044) Output:["_col0","_col1","_col2"] Merge Join Operator [MERGEJOIN_20] (rows=88846043 width=2044) Conds:GBY_4._col0=GBY_11._col0(Left Outer),Output:["_col0","_col1","_col3"] <-Group By Operator [GBY_11] (rows=88846043 width=2036) Output:["_col0","_col1"],aggregations:["collect_set(VALUE._col0)"],keys:KEY._col0 <-Group By Operator [GBY_4] (rows=88846043 width=124) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_3] PartitionCols:_col0 Group By Operator [GBY_2] (rows=324549876 width=124) Output:["_col0","_col1"],aggregations:["count()"],keys:a Select Operator [SEL_1] (rows=324549876 width=116) Output:["a"] TableScan [TS_0] (rows=324549876 width=116) database@orc_table,ot,Tbl:COMPLETE,Col:COMPLETE,Output:["a"] <-Map 3 [SIMPLE_EDGE] SHUFFLE [RS_10] PartitionCols:_col0 Group By Operator [GBY_9] (rows=324549876 width=2036) Output:["_col0","_col1"],aggregations:["collect_set(b)"],keys:a Filter Operator [FIL_19] (rows=324549876 width=232) predicate:((b <> 'invalid') and true) TableScan [TS_6] (rows=324549876 width=232) database@orc_table,ot,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]