diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 5f37fc1..05d2b28 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -270,8 +270,14 @@ private RelNode decorrelate(RelNode root) { return new Function2() { public Void apply(RelNode oldNode, RelNode newNode) { if (cm.mapRefRelToCorRef.containsKey(oldNode)) { - cm.mapRefRelToCorRef.putAll(newNode, - cm.mapRefRelToCorRef.get(oldNode)); + final CorelMap corelMap = new CorelMapBuilder().build(newNode); + // since after various rules original relnode could have different + // corref (or might not have at all) we need to traverse the new node + // to figure out new cor refs and put that into map + if(!corelMap.mapRefRelToCorRef.isEmpty()){ + cm.mapRefRelToCorRef.putAll(newNode, + corelMap.mapRefRelToCorRef.get(newNode)); + } } if (oldNode instanceof LogicalCorrelate && newNode instanceof LogicalCorrelate) { diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 78752a0..1cbbe8f 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -3609,12 +3609,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3634,93 +3630,46 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 10 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((p_size = p_size) and p_type is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 2977 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_name (type: string), p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2977 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 13 Data size: 2977 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 8 + Map 5 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((p_size = p_size) and p_type is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs - Reducer 11 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3746,71 +3695,24 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 4050 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 18 Data size: 4050 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col4 + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col6 (type: int) + expressions: _col0 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) + Statistics: Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3869,11 +3771,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3899,96 +3798,40 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_type = p_type) (type: boolean) + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string), p_size (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan alias: pp Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((p_size = p_size) and p_type is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 9 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_size (type: int), p_type (type: string) - outputColumnNames: p_size, p_type - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_size (type: int), p_type (type: string) - mode: hash - outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -4013,61 +3856,25 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col6, _col7 - Statistics: Num rows: 7 Data size: 1603 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 2748 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col6 (type: string), _col7 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 1603 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2748 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1374 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 18 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 6 Data size: 1374 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/query14.q.out b/ql/src/test/results/clientpositive/perf/query14.q.out index 332d43a..55a2e5b 100644 --- a/ql/src/test/results/clientpositive/perf/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/query14.q.out @@ -1,9 +1,9 @@ +Warning: Shuffle Join MERGEJOIN[914][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 61' is a cross product +Warning: Shuffle Join MERGEJOIN[915][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 62' is a cross product Warning: Shuffle Join MERGEJOIN[912][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product Warning: Shuffle Join MERGEJOIN[913][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product Warning: Shuffle Join MERGEJOIN[916][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 114' is a cross product Warning: Shuffle Join MERGEJOIN[917][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 115' is a cross product -Warning: Shuffle Join MERGEJOIN[914][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 61' is a cross product -Warning: Shuffle Join MERGEJOIN[915][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 62' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk diff --git a/ql/src/test/results/clientpositive/perf/query69.q.out b/ql/src/test/results/clientpositive/perf/query69.q.out index 38c7fa7..90224ce 100644 --- a/ql/src/test/results/clientpositive/perf/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/query69.q.out @@ -1,9 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[185][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 12' is a cross product -Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 26' is a cross product -Warning: Shuffle Join MERGEJOIN[186][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 22' is a cross product -Warning: Shuffle Join MERGEJOIN[179][tables = [$hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 36' is a cross product -Warning: Shuffle Join MERGEJOIN[188][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 32' is a cross product PREHOOK: query: explain select cd_gender, cd_marital_status, @@ -97,25 +91,16 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 18 (CUSTOM_SIMPLE_EDGE), Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (CUSTOM_SIMPLE_EDGE), Reducer 27 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE), Map 28 (CUSTOM_SIMPLE_EDGE), Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) -Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 37 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (SIMPLE_EDGE) -Reducer 36 <- Map 35 (CUSTOM_SIMPLE_EDGE), Map 38 (CUSTOM_SIMPLE_EDGE), Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Reducer 36 (SIMPLE_EDGE) -Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 33 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 17 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) @@ -124,259 +109,168 @@ Stage-0 limit:100 Stage-1 Reducer 7 - File Output Operator [FS_130] - Limit [LIM_129] (rows=100 width=1) + File Output Operator [FS_76] + Limit [LIM_75] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_128] (rows=1268213682555322368 width=1) + Select Operator [SEL_74] (rows=52707204 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_127] - Select Operator [SEL_126] (rows=1268213682555322368 width=1) + SHUFFLE [RS_73] + Select Operator [SEL_72] (rows=52707204 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Group By Operator [GBY_125] (rows=1268213682555322368 width=1) + Group By Operator [GBY_71] (rows=52707204 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_124] + SHUFFLE [RS_70] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_123] (rows=2536427365110644736 width=1) + Group By Operator [GBY_69] (rows=105414409 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 - Select Operator [SEL_122] (rows=2536427365110644736 width=1) + Select Operator [SEL_68] (rows=105414409 width=88) Output:["_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_121] (rows=2536427365110644736 width=1) + Filter Operator [FIL_67] (rows=105414409 width=88) predicate:_col15 is null - Merge Join Operator [MERGEJOIN_189] (rows=5072854730221289472 width=1) - Conds:RS_118._col0=RS_119._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col15"] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_119] + Merge Join Operator [MERGEJOIN_117] (rows=210828818 width=88) + Conds:RS_64._col0=RS_65._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col15"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_117] (rows=4611686018427387903 width=1) + Select Operator [SEL_63] (rows=79197206 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_116] (rows=4611686018427387903 width=1) + Group By Operator [GBY_62] (rows=79197206 width=135) Output:["_col0"],keys:KEY._col0 - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_115] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_61] PartitionCols:_col0 - Group By Operator [GBY_114] (rows=9223372036854775807 width=1) - Output:["_col0"],keys:_col5 - Merge Join Operator [MERGEJOIN_188] (rows=9223372036854775807 width=1) - Conds:(Inner),Output:["_col5"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_110] - Merge Join Operator [MERGEJOIN_184] (rows=158394413 width=135) - Conds:RS_107._col0=RS_108._col0(Inner) - <-Map 30 [SIMPLE_EDGE] - SHUFFLE [RS_107] - PartitionCols:_col0 - Select Operator [SEL_88] (rows=143994918 width=135) - Output:["_col0"] - Filter Operator [FIL_174] (rows=143994918 width=135) - predicate:((cs_ship_customer_sk = cs_ship_customer_sk) and cs_sold_date_sk is not null) - TableScan [TS_86] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Map 34 [SIMPLE_EDGE] - SHUFFLE [RS_108] - PartitionCols:_col0 - Select Operator [SEL_91] (rows=4058 width=1119) - Output:["_col0"] - Filter Operator [FIL_175] (rows=4058 width=1119) - predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) - TableScan [TS_89] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 37 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_105] (rows=4611686018427387903 width=1) - Output:["_col0"],keys:KEY._col0 - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_104] - PartitionCols:_col0 - Group By Operator [GBY_103] (rows=9223372036854775807 width=1) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_179] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),Output:["_col2"] - <-Map 35 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_98] - Select Operator [SEL_93] (rows=40000000 width=4) - TableScan [TS_92] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE - <-Map 38 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_99] - Select Operator [SEL_95] (rows=1861800 width=4) - TableScan [TS_94] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE - <-Map 39 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_100] - Select Operator [SEL_97] (rows=80000000 width=860) - Output:["_col0"] - TableScan [TS_96] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + Group By Operator [GBY_60] (rows=158394413 width=135) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_116] (rows=158394413 width=135) + Conds:RS_56._col0=RS_57._col0(Inner),Output:["_col1"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=143994918 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_109] (rows=143994918 width=135) + predicate:((cs_ship_customer_sk = cs_ship_customer_sk) and cs_sold_date_sk is not null) + TableScan [TS_50] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col0 + Select Operator [SEL_55] (rows=4058 width=1119) + Output:["_col0"] + Filter Operator [FIL_110] (rows=4058 width=1119) + predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) + TableScan [TS_53] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_118] + SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_85] (rows=4611686018427387903 width=1) + Select Operator [SEL_49] (rows=191662558 width=88) Output:["_col0","_col10","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_84] (rows=4611686018427387903 width=1) + Filter Operator [FIL_48] (rows=191662558 width=88) predicate:_col13 is null - Merge Join Operator [MERGEJOIN_187] (rows=9223372036854775807 width=1) - Conds:RS_79._col0=RS_80._col0(Inner),RS_79._col0=RS_81._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col13"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_80] - PartitionCols:_col0 - Group By Operator [GBY_39] (rows=4611686018427387903 width=1) - Output:["_col0"],keys:KEY._col0 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col0 - Group By Operator [GBY_37] (rows=9223372036854775807 width=1) - Output:["_col0"],keys:_col5 - Merge Join Operator [MERGEJOIN_185] (rows=9223372036854775807 width=1) - Conds:(Inner),Output:["_col5"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_33] - Merge Join Operator [MERGEJOIN_182] (rows=316797605 width=88) - Conds:RS_30._col0=RS_31._col0(Inner) - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=287997817 width=88) - Output:["_col0"] - Filter Operator [FIL_168] (rows=287997817 width=88) - predicate:((ss_customer_sk = ss_customer_sk) and ss_sold_date_sk is not null) - TableScan [TS_9] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=4058 width=1119) - Output:["_col0"] - Filter Operator [FIL_169] (rows=4058 width=1119) - predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_34] - Group By Operator [GBY_28] (rows=4611686018427387903 width=1) - Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Group By Operator [GBY_26] (rows=9223372036854775807 width=1) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_177] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),Output:["_col2"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_21] - Select Operator [SEL_16] (rows=40000000 width=4) - TableScan [TS_15] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE - <-Map 18 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_22] - Select Operator [SEL_18] (rows=1861800 width=4) - TableScan [TS_17] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE - <-Map 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_23] - Select Operator [SEL_20] (rows=80000000 width=860) - Output:["_col0"] - TableScan [TS_19] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_81] - PartitionCols:_col0 - Select Operator [SEL_72] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"] - Group By Operator [GBY_71] (rows=4611686018427387903 width=1) - Output:["_col0"],keys:KEY._col0 - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_70] + Select Operator [SEL_47] (rows=383325116 width=88) + Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col13"] + Merge Join Operator [MERGEJOIN_115] (rows=383325116 width=88) + Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col0","_col7","_col9","_col10","_col11","_col12","_col13"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Select Operator [SEL_36] (rows=1861800 width=385) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_108] (rows=1861800 width=385) + predicate:cd_demo_sk is not null + TableScan [TS_34] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_114] (rows=348477371 width=88) + Conds:RS_40._col0=RS_41._col0(Inner),RS_40._col0=RS_42._col0(Left Outer),Output:["_col0","_col1","_col7"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_41] PartitionCols:_col0 - Group By Operator [GBY_69] (rows=9223372036854775807 width=1) - Output:["_col0"],keys:_col5 - Merge Join Operator [MERGEJOIN_186] (rows=9223372036854775807 width=1) - Conds:(Inner),Output:["_col5"] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_65] - Merge Join Operator [MERGEJOIN_183] (rows=79201469 width=135) - Conds:RS_62._col0=RS_63._col0(Inner) - <-Map 20 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col0 - Select Operator [SEL_43] (rows=72001334 width=135) - Output:["_col0"] - Filter Operator [FIL_171] (rows=72001334 width=135) - predicate:((ws_bill_customer_sk = ws_bill_customer_sk) and ws_sold_date_sk is not null) - TableScan [TS_41] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Map 24 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Group By Operator [GBY_18] (rows=158398802 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=316797605 width=88) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_112] (rows=316797605 width=88) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_46] (rows=4058 width=1119) + Select Operator [SEL_11] (rows=4058 width=1119) Output:["_col0"] - Filter Operator [FIL_172] (rows=4058 width=1119) + Filter Operator [FIL_105] (rows=4058 width=1119) predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) - TableScan [TS_44] (rows=73049 width=1119) + TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_66] - Group By Operator [GBY_60] (rows=4611686018427387903 width=1) - Output:["_col0"],keys:KEY._col0 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_59] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_12] PartitionCols:_col0 - Group By Operator [GBY_58] (rows=9223372036854775807 width=1) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_178] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),Output:["_col2"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_53] - Select Operator [SEL_48] (rows=40000000 width=4) - TableScan [TS_47] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE - <-Map 28 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_54] - Select Operator [SEL_50] (rows=1861800 width=4) - TableScan [TS_49] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE - <-Map 29 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_55] - Select Operator [SEL_52] (rows=80000000 width=860) - Output:["_col0"] - TableScan [TS_51] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_79] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_181] (rows=96800003 width=860) - Conds:RS_76._col1=RS_77._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_77] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_167] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_76] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_180] (rows=88000001 width=860) - Conds:RS_73._col2=RS_74._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_73] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_165] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_74] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_166] (rows=20000000 width=1014) - predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + Select Operator [SEL_8] (rows=287997817 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_104] (rows=287997817 width=88) + predicate:((ss_customer_sk = ss_customer_sk) and ss_sold_date_sk is not null) + TableScan [TS_6] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_33] (rows=39600734 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_32] (rows=39600734 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Group By Operator [GBY_30] (rows=79201469 width=135) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_113] (rows=79201469 width=135) + Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Select Operator [SEL_22] (rows=72001334 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_106] (rows=72001334 width=135) + predicate:((ws_bill_customer_sk = ws_bill_customer_sk) and ws_sold_date_sk is not null) + TableScan [TS_20] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Map 16 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_25] (rows=4058 width=1119) + Output:["_col0"] + Filter Operator [FIL_107] (rows=4058 width=1119) + predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) + TableScan [TS_23] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_111] (rows=88000001 width=860) + Conds:RS_37._col2=RS_38._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_102] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_103] (rows=20000000 width=1014) + predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 1ef17d9..7f53d27 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -3411,12 +3411,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 10 (GROUP, 2) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 11 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3434,73 +3430,42 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Map 10 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int) - outputColumnNames: p_size - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: ((p_size = p_size) and p_type is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string), p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 8 + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: ((p_size = p_size) and p_type is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 11 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -3510,10 +3475,10 @@ STAGE PLANS: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3524,68 +3489,24 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col4 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col6 (type: int) + expressions: _col0 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3643,11 +3564,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 2) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3671,91 +3589,36 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: (p_type = p_type) (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Map 5 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string), p_size (type: int) - outputColumnNames: p_type, p_size - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_type (type: string), p_size (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: ((p_size = p_size) and p_type is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 9 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int), p_type (type: string) - outputColumnNames: p_size, p_type - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_size (type: int), p_type (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 10 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -3765,10 +3628,10 @@ STAGE PLANS: 0 _col1 (type: string), _col2 (type: string), _col3 (type: int) 1 _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 67 Data size: 8375 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 67 Data size: 8375 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3778,59 +3641,25 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col6, _col7 - Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col6 (type: string), _col7 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator