diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index 76e0780..7c96f3d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -164,6 +164,25 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, boolean isCorrScalarAgg) { switch (e.getKind()) { case SCALAR_QUERY: + builder.push(e.rel); + // returns single row/column + builder.aggregate(builder.groupKey(), + builder.count(false, "cnt")); + + SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, + InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); + + // we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer + // ends up getting rid of Project since it is not used further up the tree + builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + builder.call(countCheck, builder.field("cnt")), + builder.literal(1))); + if( !variablesSet.isEmpty()) + { + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + } + else + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); if(isCorrScalarAgg) { // Transformation : // Outer Query Left Join (inner query) on correlated predicate and preserve rows only from left side. @@ -193,26 +212,7 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, //Transformation is to left join for correlated predicates and inner join otherwise, // but do a count on inner side before that to make sure it generates atmost 1 row. - builder.push(e.rel); - // returns single row/column - builder.aggregate(builder.groupKey(), - builder.count(false, "cnt")); - - SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, - InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); - // we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer - // ends up getting rid of Project since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - builder.call(countCheck, builder.field("cnt")), - builder.literal(1))); - - if( !variablesSet.isEmpty()) - { - builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - } - else - builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); builder.push(e.rel); builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); offset++; diff --git a/ql/src/test/queries/clientnegative/subquery_scalar_corr_multi_rows.q b/ql/src/test/queries/clientnegative/subquery_scalar_corr_multi_rows.q new file mode 100644 index 0000000..e9ea703 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_scalar_corr_multi_rows.q @@ -0,0 +1,2 @@ +-- inner query produces more than one row +select * from part where p_size > (select count(*) from part p where p.p_mfgr = part.p_mfgr group by p_type); \ No newline at end of file diff --git a/ql/src/test/results/clientnegative/subquery_scalar_corr_multi_rows.q.out b/ql/src/test/results/clientnegative/subquery_scalar_corr_multi_rows.q.out new file mode 100644 index 0000000..3235048 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_scalar_corr_multi_rows.q.out @@ -0,0 +1,5 @@ +PREHOOK: query: select * from part where p_size > (select count(*) from part p where p.p_mfgr = part.p_mfgr group by p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index da387d7..f6dc397 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -1681,9 +1681,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1707,6 +1708,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_name = p_name) (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (p_name = p_name) (type: boolean) @@ -1735,16 +1756,18 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col0 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col1 + 100) < CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END) (type: boolean) - Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((_col1 + 100) < CASE WHEN (_col5 is null) THEN (null) ELSE (_col4) END) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1773,6 +1796,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 2 Data size: 258 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 258 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 258 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: max(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -1817,8 +1866,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1842,6 +1892,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null + Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_type = p_type) (type: boolean) + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: part_null Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_type = p_type) (type: boolean) @@ -1866,21 +1936,23 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col4 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 + Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(_col5) > CASE WHEN (_col10 is null) THEN (null) ELSE (_col9) END) (type: boolean) - Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(_col5) > CASE WHEN (_col12 is null) THEN (null) ELSE (_col11) END) (type: boolean) + Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1889,6 +1961,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: avg(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -1922,8 +2020,8 @@ POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null #### A masked pattern was here #### 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[45][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_size BETWEEN (select min(p_size) from part_null where part_null.p_type = part.p_type) AND (select max(p_size) from part_null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size BETWEEN (select min(p_size) from part_null where part_null.p_type = part.p_type) AND (select max(p_size) from part_null) @@ -1938,11 +2036,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1962,10 +2061,50 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: part_null + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_size (type: int) + outputColumnNames: p_size + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(p_size) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: part_null + Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_type = p_type) (type: boolean) + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: part_null Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_type = p_type) (type: boolean) @@ -1984,7 +2123,7 @@ STAGE PLANS: value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: part_null @@ -2004,30 +2143,33 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 9 - Map Operator Tree: - TableScan - alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int) - outputColumnNames: p_size - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(p_size) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -2042,15 +2184,17 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col4 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 + Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: boolean) + Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: int), _col12 (type: boolean) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2060,12 +2204,12 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 28 Data size: 17955 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 + Statistics: Num rows: 57 Data size: 35919 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 28 Data size: 17955 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: boolean) + Statistics: Num rows: 57 Data size: 35919 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: int), _col12 (type: boolean) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2075,18 +2219,18 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col13 - Statistics: Num rows: 28 Data size: 18095 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col15 + Statistics: Num rows: 57 Data size: 36204 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col5 BETWEEN CASE WHEN (_col10 is null) THEN (null) ELSE (_col9) END AND _col13 (type: boolean) - Statistics: Num rows: 3 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + predicate: _col5 BETWEEN CASE WHEN (_col12 is null) THEN (null) ELSE (_col11) END AND _col15 (type: boolean) + Statistics: Num rows: 6 Data size: 3810 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 3810 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 3810 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2095,6 +2239,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -2110,29 +2280,6 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: boolean) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2140,8 +2287,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[45][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_size BETWEEN (select min(p_size) from part_null where part_null.p_type = part.p_type) AND (select max(p_size) from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2178,7 +2325,7 @@ POSTHOOK: Input: default@part_null 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size >= (select min(p_size) from part_null where part_null.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size >= (select min(p_size) from part_null where part_null.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null) @@ -2192,10 +2339,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2216,89 +2364,123 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: part_null + Statistics: Num rows: 407 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_retailprice (type: double) + outputColumnNames: p_retailprice + Statistics: Num rows: 407 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(p_retailprice) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Execution mode: llap + LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(p_size) keys: p_type (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: p_partkey - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_type = p_type) (type: boolean) + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(p_partkey) + aggregations: min(p_size) + keys: p_type (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 407 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_retailprice (type: double) - outputColumnNames: p_retailprice - Statistics: Num rows: 407 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + expressions: p_partkey (type: int) + outputColumnNames: p_partkey + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(p_retailprice) + aggregations: count(p_partkey) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col4 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 + Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col5 >= CASE WHEN (_col10 is null) THEN (null) ELSE (_col9) END) (type: boolean) - Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE + predicate: (_col5 >= CASE WHEN (_col12 is null) THEN (null) ELSE (_col11) END) (type: boolean) + Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2311,18 +2493,18 @@ STAGE PLANS: 0 1 2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13 - Statistics: Num rows: 9 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col15 + Statistics: Num rows: 19 Data size: 12125 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col7 <= _col13) (type: boolean) - Statistics: Num rows: 3 Data size: 1947 Basic stats: COMPLETE Column stats: NONE + predicate: (_col7 <= _col15) (type: boolean) + Statistics: Num rows: 6 Data size: 3828 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 1947 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 3828 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1947 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 3828 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2331,6 +2513,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -2346,7 +2554,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 7 Data size: 735 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: boolean) - Reducer 7 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2369,18 +2577,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -2388,7 +2584,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_size >= (select min(p_size) from part_null where part_null.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2668,8 +2864,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2693,6 +2890,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_size = p_size) and (p_partkey = p_partkey)) (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_partkey (type: int), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((p_size = p_size) and (p_partkey = p_partkey)) (type: boolean) @@ -2721,21 +2938,23 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col0 (type: int), _col5 (type: int) - 1 _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 26 Data size: 16406 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col0 (type: int), _col1 (type: int) + 2 _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 + Statistics: Num rows: 1 Data size: 631 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToLong(_col5) <> CASE WHEN (_col10 is null) THEN (0) ELSE (_col9) END) (type: boolean) - Statistics: Num rows: 26 Data size: 16406 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToLong(_col5) <> CASE WHEN (_col13 is null) THEN (0) ELSE (_col12) END) (type: boolean) + Statistics: Num rows: 1 Data size: 631 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2744,6 +2963,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int), _col1 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col2) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -3772,9 +4017,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3794,24 +4041,43 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_size = p_size) and p_type is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: p - Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_name (type: string), p_type (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_type (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -3834,27 +4100,49 @@ STAGE PLANS: value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 26 Data size: 8138 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col0 (type: int) + 2 _col2 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 313 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean) - Statistics: Num rows: 13 Data size: 4069 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (_col1 like CASE WHEN (_col6 is null) THEN (null) ELSE (_col5) END) (type: boolean) + Statistics: Num rows: 1 Data size: 313 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3868,6 +4156,53 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col3 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col3 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -3886,7 +4221,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 5 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4247,7 +4582,7 @@ POSTHOOK: Input: default@part_null 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra 15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type) @@ -4262,9 +4597,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -4284,6 +4620,32 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_type = p_type) (type: boolean) + Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_name (type: string) + outputColumnNames: p_type, p_name + Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_name) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -4325,32 +4687,52 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pp - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_type = p_type) (type: boolean) - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string), p_name (type: string) - outputColumnNames: p_type, p_name - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(p_name) - keys: p_type (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -4413,21 +4795,23 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col4 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14 - Statistics: Num rows: 7 Data size: 982 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col15, _col16 + Statistics: Num rows: 15 Data size: 1964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not (_col1 like CASE WHEN (_col14 is null) THEN (null) ELSE (_col13) END)) (type: boolean) - Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE + predicate: (not (_col1 like CASE WHEN (_col16 is null) THEN (null) ELSE (_col15) END)) (type: boolean) + Statistics: Num rows: 8 Data size: 1047 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1047 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1047 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4469,7 +4853,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4502,8 +4886,9 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4550,6 +4935,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: lineitem + Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_linenumber (type: int) + outputColumnNames: l_linenumber + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: l_linenumber (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: lineitem Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) @@ -4608,13 +5017,15 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col4 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5, _col6 + 1 _col0 (type: int) + 2 _col2 (type: int) + outputColumnNames: _col0, _col1, _col3, _col7, _col8 Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col1 <> CASE WHEN (_col6 is null) THEN (null) ELSE (_col5) END) (type: boolean) + predicate: (_col1 <> CASE WHEN (_col8 is null) THEN (null) ELSE (_col7) END) (type: boolean) Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) @@ -4631,6 +5042,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial @@ -4713,8 +5150,9 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4761,6 +5199,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: lineitem + Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_linenumber (type: int) + outputColumnNames: l_linenumber + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: l_linenumber (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: lineitem Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) @@ -4819,13 +5281,15 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col4 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5, _col6 + 1 _col0 (type: int) + 2 _col2 (type: int) + outputColumnNames: _col0, _col1, _col3, _col7, _col8 Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col1 <> CASE WHEN (_col6 is null) THEN (null) ELSE (_col5) END) (type: boolean) + predicate: (_col1 <> CASE WHEN (_col8 is null) THEN (null) ELSE (_col7) END) (type: boolean) Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) @@ -4842,6 +5306,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial @@ -5124,8 +5614,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5169,6 +5660,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pp + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_type = p_type) (type: boolean) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (p_type = p_type) (type: boolean) @@ -5215,32 +5726,60 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col2 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 13 Data size: 6565 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col5, _col6 + Statistics: Num rows: 1 Data size: 505 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (not (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END)) (type: boolean) - Statistics: Num rows: 7 Data size: 3535 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (not (_col1 like CASE WHEN (_col6 is null) THEN (null) ELSE (_col5) END)) (type: boolean) + Statistics: Num rows: 1 Data size: 505 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -5431,8 +5970,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5460,6 +6000,26 @@ STAGE PLANS: Filter Operator predicate: (name = name) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (name = name) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: name (type: string), deptno (type: int) outputColumnNames: name, deptno @@ -5484,21 +6044,23 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col1 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col12, _col13 + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col2) <> CASE WHEN (_col11 is null) THEN (0) ELSE (_col10) END) (type: boolean) - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToLong(_col2) <> CASE WHEN (_col13 is null) THEN (0) ELSE (_col12) END) (type: boolean) + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5507,6 +6069,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -5557,8 +6145,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5587,6 +6176,26 @@ STAGE PLANS: predicate: (deptno = deptno) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator + keys: deptno (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (deptno = deptno) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator aggregations: min(name) keys: deptno (type: int) mode: hash @@ -5606,21 +6215,23 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: int) + 2 _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col12, _col13 + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > CASE WHEN (_col11 is null) THEN (null) ELSE (_col10) END) (type: boolean) - Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > CASE WHEN (_col13 is null) THEN (null) ELSE (_col12) END) (type: boolean) + Statistics: Num rows: 3 Data size: 142 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 142 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 142 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5629,6 +6240,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial @@ -5675,10 +6312,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5698,6 +6337,28 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) Execution mode: llap LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (deptno = deptno) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(name) + keys: deptno (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs Map 4 Map Operator Tree: TableScan @@ -5706,6 +6367,26 @@ STAGE PLANS: Filter Operator predicate: (name = name) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (name = name) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: name (type: string), deptno (type: int) outputColumnNames: name, deptno @@ -5724,7 +6405,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: depts @@ -5733,42 +6414,61 @@ STAGE PLANS: predicate: (deptno = deptno) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(name) keys: deptno (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col1 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col12, _col13 + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col2) <> CASE WHEN (_col11 is null) THEN (0) ELSE (_col10) END) (type: boolean) - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToLong(_col2) <> CASE WHEN (_col13 is null) THEN (0) ELSE (_col12) END) (type: boolean) + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) Reducer 3 Execution mode: llap @@ -5776,21 +6476,23 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col2 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14 - Statistics: Num rows: 5 Data size: 287 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: int) + 2 _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col17, _col18 + Statistics: Num rows: 24 Data size: 1150 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col0) > CASE WHEN (_col14 is null) THEN (0) ELSE (_col13) END) (type: boolean) - Statistics: Num rows: 1 Data size: 57 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToLong(_col0) > CASE WHEN (_col18 is null) THEN (0) ELSE (_col17) END) (type: boolean) + Statistics: Num rows: 8 Data size: 383 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 383 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 383 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5799,6 +6501,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -5814,25 +6542,32 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: boolean) - Reducer 7 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: boolean) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -5855,7 +6590,7 @@ POSTHOOK: Input: default@emps 120 Wilma 20 F NULL 1 5 NULL true 2005-09-07 130 Alice 40 F Vancouver 2 NULL false true 2007-01-01 110 John 40 M Vancouver 2 NULL false true 2002-05-03 -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from emps where deptno <> (select sum(deptno) from depts where depts.name = emps.name) and empno > (select count(name) from depts) PREHOOK: type: QUERY POSTHOOK: query: explain select * from emps where deptno <> (select sum(deptno) from depts where depts.name = emps.name) and empno > (select count(name) from depts) @@ -5869,10 +6604,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -5893,6 +6629,26 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) Execution mode: llap LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string) + outputColumnNames: name + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(name) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: no inputs Map 4 Map Operator Tree: TableScan @@ -5901,6 +6657,26 @@ STAGE PLANS: Filter Operator predicate: (name = name) (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (name = name) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: name (type: string), deptno (type: int) outputColumnNames: name, deptno @@ -5919,7 +6695,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: depts @@ -5939,47 +6715,41 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 8 - Map Operator Tree: - TableScan - alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string) - outputColumnNames: name - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(name) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reducer 11 Execution mode: llap - LLAP IO: no inputs + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col1 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col12, _col13 + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col2) <> CASE WHEN (_col11 is null) THEN (null) ELSE (_col10) END) (type: boolean) - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToLong(_col2) <> CASE WHEN (_col13 is null) THEN (null) ELSE (_col12) END) (type: boolean) + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 523 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) Reducer 3 Execution mode: llap @@ -5992,18 +6762,18 @@ STAGE PLANS: 0 1 2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col14 - Statistics: Num rows: 5 Data size: 346 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col16 + Statistics: Num rows: 11 Data size: 710 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col0) > _col14) (type: boolean) - Statistics: Num rows: 1 Data size: 69 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToLong(_col0) > _col16) (type: boolean) + Statistics: Num rows: 3 Data size: 193 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 193 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 193 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6012,6 +6782,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -6027,7 +6823,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: boolean) - Reducer 7 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -6050,18 +6846,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator @@ -6069,7 +6853,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from emps where deptno <> (select count(deptno) from depts where depts.name = emps.name) and empno > (select count(name) from depts) PREHOOK: type: QUERY PREHOOK: Input: default@depts diff --git a/ql/src/test/results/clientpositive/perf/query1.q.out b/ql/src/test/results/clientpositive/perf/query1.q.out index 09278e3..53acdcd 100644 --- a/ql/src/test/results/clientpositive/perf/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/query1.q.out @@ -47,131 +47,181 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 13 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 14 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 - File Output Operator [FS_54] - Limit [LIM_53] (rows=100 width=860) + Reducer 8 + File Output Operator [FS_82] + Limit [LIM_81] (rows=100 width=860) Number of rows:100 - Select Operator [SEL_52] (rows=32266667 width=860) + Select Operator [SEL_80] (rows=35493335 width=860) Output:["_col0"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_51] - Select Operator [SEL_50] (rows=32266667 width=860) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_79] + Select Operator [SEL_78] (rows=35493335 width=860) Output:["_col0"] - Filter Operator [FIL_49] (rows=32266667 width=860) - predicate:(_col2 > CASE WHEN (_col8 is null) THEN (null) ELSE (_col7) END) - Merge Join Operator [MERGEJOIN_78] (rows=96800003 width=860) - Conds:RS_45._col1=RS_46._col2(Left Outer),Output:["_col2","_col6","_col7","_col8"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_46] + Filter Operator [FIL_77] (rows=35493335 width=860) + predicate:(_col2 > CASE WHEN (_col10 is null) THEN (null) ELSE (_col9) END) + Merge Join Operator [MERGEJOIN_114] (rows=106480005 width=860) + Conds:RS_74._col1=RS_75._col2(Left Outer),Output:["_col2","_col6","_col9","_col10"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_75] PartitionCols:_col2 - Select Operator [SEL_38] (rows=7918783 width=77) + Select Operator [SEL_73] (rows=7918783 width=77) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_37] (rows=7918783 width=77) + Group By Operator [GBY_72] (rows=7918783 width=77) Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1 - Select Operator [SEL_33] (rows=15837566 width=77) + Select Operator [SEL_68] (rows=15837566 width=77) Output:["_col1","_col2"] - Group By Operator [GBY_32] (rows=15837566 width=77) + Group By Operator [GBY_67] (rows=15837566 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_31] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_66] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=31675133 width=77) + Group By Operator [GBY_65] (rows=31675133 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Select Operator [SEL_29] (rows=31675133 width=77) + Select Operator [SEL_64] (rows=31675133 width=77) Output:["_col2","_col1","_col3"] - Merge Join Operator [MERGEJOIN_77] (rows=31675133 width=77) - Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_26] + Merge Join Operator [MERGEJOIN_112] (rows=31675133 width=77) + Conds:RS_61._col0=RS_62._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 16 [SIMPLE_EDGE] + SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_22] (rows=28795575 width=77) + Select Operator [SEL_57] (rows=28795575 width=77) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_72] (rows=28795575 width=77) + Filter Operator [FIL_106] (rows=28795575 width=77) predicate:((sr_store_sk = sr_store_sk) and sr_returned_date_sk is not null) - TableScan [TS_20] (rows=57591150 width=77) + TableScan [TS_55] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_27] + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_62] PartitionCols:_col0 - Select Operator [SEL_25] (rows=36524 width=1119) + Select Operator [SEL_60] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_73] (rows=36524 width=1119) + Filter Operator [FIL_107] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_23] (rows=73049 width=1119) + TableScan [TS_58] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_45] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_76] (rows=88000001 width=860) - Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1","_col2","_col6"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_43] + Merge Join Operator [MERGEJOIN_113] (rows=96800003 width=860) + Conds:RS_51._col1=RS_52._col0(Left Outer),Output:["_col1","_col2","_col6"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_52] PartitionCols:_col0 - Select Operator [SEL_19] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_71] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_17] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_75] (rows=34842647 width=77) - Conds:RS_39._col1=RS_40._col0(Inner),Output:["_col0","_col1","_col2"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_40] + Select Operator [SEL_44] (rows=1319797 width=77) + Output:["_col0"] + Filter Operator [FIL_43] (rows=1319797 width=77) + predicate:(sq_count_check(_col1) <= 1) + Group By Operator [GBY_42] (rows=3959391 width=77) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Group By Operator [GBY_37] (rows=7918783 width=77) + Output:["_col0"],keys:_col1 + Select Operator [SEL_33] (rows=15837566 width=77) + Output:["_col1"] + Group By Operator [GBY_32] (rows=15837566 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Group By Operator [GBY_30] (rows=31675133 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Select Operator [SEL_29] (rows=31675133 width=77) + Output:["_col2","_col1","_col3"] + Merge Join Operator [MERGEJOIN_111] (rows=31675133 width=77) + Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Select Operator [SEL_22] (rows=28795575 width=77) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_104] (rows=28795575 width=77) + predicate:((sr_store_sk = sr_store_sk) and sr_returned_date_sk is not null) + TableScan [TS_20] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_25] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_105] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_23] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_110] (rows=88000001 width=860) + Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_103] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_17] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_48] PartitionCols:_col0 - Select Operator [SEL_16] (rows=852 width=1910) - Output:["_col0"] - Filter Operator [FIL_70] (rows=852 width=1910) - predicate:((s_state = 'NM') and s_store_sk is not null) - TableScan [TS_14] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col1 - Select Operator [SEL_13] (rows=31675133 width=77) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_12] (rows=31675133 width=77) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:_col0, _col1 - Group By Operator [GBY_10] (rows=63350266 width=77) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Select Operator [SEL_9] (rows=63350266 width=77) - Output:["_col2","_col1","_col3"] - Merge Join Operator [MERGEJOIN_74] (rows=63350266 width=77) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_68] (rows=57591150 width=77) - predicate:(sr_returned_date_sk is not null and sr_store_sk is not null and sr_customer_sk is not null) - TableScan [TS_0] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_69] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + Merge Join Operator [MERGEJOIN_109] (rows=34842647 width=77) + Conds:RS_45._col1=RS_46._col0(Inner),Output:["_col0","_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_102] (rows=852 width=1910) + predicate:((s_state = 'NM') and s_store_sk is not null) + TableScan [TS_14] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col1 + Select Operator [SEL_13] (rows=31675133 width=77) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_12] (rows=31675133 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Select Operator [SEL_9] (rows=63350266 width=77) + Output:["_col2","_col1","_col3"] + Merge Join Operator [MERGEJOIN_108] (rows=63350266 width=77) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_100] (rows=57591150 width=77) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null and sr_customer_sk is not null) + TableScan [TS_0] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_101] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] diff --git a/ql/src/test/results/clientpositive/perf/query30.q.out b/ql/src/test/results/clientpositive/perf/query30.q.out index 3bb4f81..d59098b 100644 --- a/ql/src/test/results/clientpositive/perf/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/query30.q.out @@ -59,163 +59,228 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 16 <- Map 19 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 22 <- Map 25 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 4 - File Output Operator [FS_67] - Limit [LIM_66] (rows=100 width=860) + Reducer 5 + File Output Operator [FS_101] + Limit [LIM_100] (rows=100 width=860) Number of rows:100 - Select Operator [SEL_65] (rows=32266667 width=860) + Select Operator [SEL_99] (rows=35493335 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_64] - Select Operator [SEL_63] (rows=32266667 width=860) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_98] + Select Operator [SEL_97] (rows=35493335 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_62] (rows=32266667 width=860) - predicate:(_col2 > CASE WHEN (_col20 is null) THEN (null) ELSE (_col19) END) - Select Operator [SEL_61] (rows=96800003 width=860) - Output:["_col2","_col6","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - Merge Join Operator [MERGEJOIN_105] (rows=96800003 width=860) - Conds:RS_58._col0=RS_59._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col19","_col20"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_104] (rows=24200000 width=1014) - Conds:RS_51._col1=RS_52._col2(Left Outer),Output:["_col0","_col2","_col3","_col4"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col2 - Select Operator [SEL_50] (rows=5500000 width=1014) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_49] (rows=5500000 width=1014) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 - Select Operator [SEL_45] (rows=11000000 width=1014) - Output:["_col0","_col2"] - Group By Operator [GBY_44] (rows=11000000 width=1014) + Filter Operator [FIL_96] (rows=35493335 width=860) + predicate:(_col2 > CASE WHEN (_col22 is null) THEN (null) ELSE (_col21) END) + Merge Join Operator [MERGEJOIN_153] (rows=106480005 width=860) + Conds:RS_93._col1=RS_94._col2(Left Outer),Output:["_col2","_col6","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col21","_col22"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col2 + Select Operator [SEL_92] (rows=5500000 width=1014) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_91] (rows=5500000 width=1014) + Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 + Select Operator [SEL_87] (rows=11000000 width=1014) + Output:["_col0","_col2"] + Group By Operator [GBY_86] (rows=11000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col0 + Group By Operator [GBY_84] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_83] (rows=22000000 width=1014) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_150] (rows=22000000 width=1014) + Conds:RS_80._col2=RS_81._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 25 [SIMPLE_EDGE] + SHUFFLE [RS_81] + PartitionCols:_col0 + Select Operator [SEL_76] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_143] (rows=20000000 width=1014) + predicate:((ca_state = ca_state) and ca_address_sk is not null) + TableScan [TS_74] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_149] (rows=15838314 width=92) + Conds:RS_77._col0=RS_78._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_141] (rows=14398467 width=92) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) + TableScan [TS_68] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] + <-Map 24 [SIMPLE_EDGE] + SHUFFLE [RS_78] + PartitionCols:_col0 + Select Operator [SEL_73] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_142] (rows=36524 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null) + TableScan [TS_71] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col1 + Select Operator [SEL_67] (rows=96800003 width=860) + Output:["_col1","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col2","_col6","_col8","_col9"] + Merge Join Operator [MERGEJOIN_152] (rows=96800003 width=860) + Conds:RS_64._col0=RS_65._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17","_col18"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_151] (rows=24200000 width=1014) + Conds:RS_57._col1=RS_58._col0(Left Outer),Output:["_col0","_col1","_col2"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1 + Select Operator [SEL_25] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_24] (rows=22000000 width=1014) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Group By Operator [GBY_42] (rows=22000000 width=1014) + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=44000000 width=1014) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Select Operator [SEL_41] (rows=22000000 width=1014) + Select Operator [SEL_21] (rows=44000000 width=1014) Output:["_col7","_col1","_col3"] - Merge Join Operator [MERGEJOIN_103] (rows=22000000 width=1014) - Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_39] + Merge Join Operator [MERGEJOIN_146] (rows=44000000 width=1014) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_34] (rows=20000000 width=1014) + Select Operator [SEL_14] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_98] (rows=20000000 width=1014) - predicate:((ca_state = ca_state) and ca_address_sk is not null) - TableScan [TS_32] (rows=40000000 width=1014) + Filter Operator [FIL_137] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_12] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_38] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_102] (rows=15838314 width=92) - Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col0 - Select Operator [SEL_28] (rows=14398467 width=92) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_96] (rows=14398467 width=92) - predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) - TableScan [TS_26] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_36] + Merge Join Operator [MERGEJOIN_145] (rows=15838314 width=92) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_31] (rows=36524 width=1119) + Select Operator [SEL_11] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_97] (rows=36524 width=1119) + Filter Operator [FIL_136] (rows=36524 width=1119) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_29] (rows=73049 width=1119) + TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Select Operator [SEL_25] (rows=22000000 width=1014) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_24] (rows=22000000 width=1014) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=44000000 width=1014) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Select Operator [SEL_21] (rows=44000000 width=1014) - Output:["_col7","_col1","_col3"] - Merge Join Operator [MERGEJOIN_101] (rows=44000000 width=1014) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_95] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_100] (rows=15838314 width=92) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_94] (rows=36524 width=1119) - predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_15] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_135] (rows=14398467 width=92) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) + TableScan [TS_6] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=916666 width=1014) + Output:["_col0"] + Filter Operator [FIL_55] (rows=916666 width=1014) + predicate:(sq_count_check(_col1) <= 1) + Group By Operator [GBY_54] (rows=2750000 width=1014) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Group By Operator [GBY_49] (rows=5500000 width=1014) + Output:["_col0"],keys:_col0 + Select Operator [SEL_45] (rows=11000000 width=1014) + Output:["_col0"] + Group By Operator [GBY_44] (rows=11000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col0 - Select Operator [SEL_8] (rows=14398467 width=92) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_93] (rows=14398467 width=92) - predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) - TableScan [TS_6] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_99] (rows=88000001 width=860) - Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_91] (rows=80000000 width=860) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_day","c_birth_month","c_birth_year","c_birth_country","c_login","c_email_address","c_last_review_date"] - <-Map 5 [SIMPLE_EDGE] - SHUFFLE [RS_56] + Group By Operator [GBY_42] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_41] (rows=22000000 width=1014) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_148] (rows=22000000 width=1014) + Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_140] (rows=20000000 width=1014) + predicate:((ca_state = ca_state) and ca_address_sk is not null) + TableScan [TS_32] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_147] (rows=15838314 width=92) + Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_138] (rows=14398467 width=92) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) + TableScan [TS_26] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_139] (rows=36524 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null) + TableScan [TS_29] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_5] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_92] (rows=20000000 width=1014) - predicate:((ca_state = 'IL') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + Merge Join Operator [MERGEJOIN_144] (rows=88000001 width=860) + Conds:RS_61._col2=RS_62._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_133] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_day","c_birth_month","c_birth_year","c_birth_country","c_login","c_email_address","c_last_review_date"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_134] (rows=20000000 width=1014) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] diff --git a/ql/src/test/results/clientpositive/perf/query6.q.out b/ql/src/test/results/clientpositive/perf/query6.q.out index ca17206..06cd19f 100644 --- a/ql/src/test/results/clientpositive/perf/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/query6.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[111][tables = [$hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[129][tables = [$hdt$_5, $hdt$_6]] in Stage 'Reducer 17' is a cross product PREHOOK: query: explain select a.ca_state state, count(*) cnt from customer_address a ,customer c @@ -50,182 +50,209 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 20 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 19 <- Map 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 21 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 23 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 24 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 - File Output Operator [FS_77] - Limit [LIM_76] (rows=100 width=88) + Reducer 8 + File Output Operator [FS_93] + Limit [LIM_92] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_75] (rows=42591679 width=88) + Select Operator [SEL_91] (rows=46850848 width=88) Output:["_col0","_col1"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_74] - Filter Operator [FIL_72] (rows=42591679 width=88) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_90] + Filter Operator [FIL_88] (rows=46850848 width=88) predicate:(_col1 >= 10) - Group By Operator [GBY_71] (rows=127775039 width=88) + Group By Operator [GBY_87] (rows=140552546 width=88) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_70] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_86] PartitionCols:_col0 - Group By Operator [GBY_69] (rows=255550079 width=88) + Group By Operator [GBY_85] (rows=281105093 width=88) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 - Select Operator [SEL_68] (rows=255550079 width=88) + Select Operator [SEL_84] (rows=281105093 width=88) Output:["_col1"] - Filter Operator [FIL_67] (rows=255550079 width=88) - predicate:(_col10 > (1.2 * CASE WHEN (_col15 is null) THEN (null) ELSE (_col14) END)) - Select Operator [SEL_66] (rows=766650239 width=88) - Output:["_col1","_col10","_col14","_col15"] - Merge Join Operator [MERGEJOIN_114] (rows=766650239 width=88) - Conds:RS_63._col6=RS_64._col0(Inner),Output:["_col1","_col3","_col4","_col12"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_64] - PartitionCols:_col0 - Select Operator [SEL_53] (rows=80353 width=1119) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_112] (rows=80353 width=1119) - Conds:RS_50._col0=RS_51._col1(Inner),Output:["_col2"] - <-Map 21 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Select Operator [SEL_46] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_107] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_month_seq is not null) - TableScan [TS_44] (rows=73049 width=1119) - default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_111] (rows=9131 width=1128) - Conds:(Inner),Output:["_col0"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_47] - Group By Operator [GBY_28] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Group By Operator [GBY_26] (rows=18262 width=1119) - Output:["_col0"],keys:d_month_seq - Select Operator [SEL_25] (rows=18262 width=1119) - Output:["d_month_seq"] - Filter Operator [FIL_105] (rows=18262 width=1119) - predicate:((d_year = 2000) and (d_moy = 2) and d_month_seq is not null) - TableScan [TS_23] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 20 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_48] - Select Operator [SEL_43] (rows=1 width=8) - Filter Operator [FIL_42] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_40] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_39] - Group By Operator [GBY_38] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_36] (rows=9131 width=1119) - Group By Operator [GBY_35] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Group By Operator [GBY_33] (rows=18262 width=1119) - Output:["_col0"],keys:d_month_seq - Select Operator [SEL_32] (rows=18262 width=1119) - Output:["d_month_seq"] - Filter Operator [FIL_106] (rows=18262 width=1119) - predicate:((d_year = 2000) and (d_moy = 2)) - TableScan [TS_30] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_63] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_113] (rows=696954748 width=88) - Conds:RS_60._col8=RS_61._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col12"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_61] + Filter Operator [FIL_83] (rows=281105093 width=88) + predicate:(_col10 > (1.2 * CASE WHEN (_col17 is null) THEN (null) ELSE (_col16) END)) + Merge Join Operator [MERGEJOIN_133] (rows=843315281 width=88) + Conds:RS_80._col11=RS_81._col2(Left Outer),Output:["_col1","_col10","_col16","_col17"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_81] + PartitionCols:_col2 + Select Operator [SEL_79] (rows=115500 width=1436) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_78] (rows=115500 width=1436) + Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 + <-Map 23 [SIMPLE_EDGE] + SHUFFLE [RS_77] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_110] (rows=88000001 width=860) - Conds:RS_19._col1=RS_20._col0(Inner),Output:["_col0","_col3"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col1 - Select Operator [SEL_15] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_103] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_13] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_104] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_16] (rows=40000000 width=1014) - default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_109] (rows=633595212 width=88) - Conds:RS_57._col0=RS_58._col1(Inner),Output:["_col1","_col3","_col4","_col6","_col8"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col1 - Select Operator [SEL_12] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_102] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) - TableScan [TS_10] (rows=575995635 width=88) - default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_57] + Group By Operator [GBY_76] (rows=231000 width=1436) + Output:["_col0","_col1"],aggregations:["avg(i_current_price)"],keys:i_category + Select Operator [SEL_75] (rows=231000 width=1436) + Output:["i_category","i_current_price"] + Filter Operator [FIL_125] (rows=231000 width=1436) + predicate:(i_category = i_category) + TableScan [TS_73] (rows=462000 width=1436) + default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col11 + Select Operator [SEL_72] (rows=766650239 width=88) + Output:["_col1","_col10","_col11"] + Merge Join Operator [MERGEJOIN_132] (rows=766650239 width=88) + Conds:RS_69._col5=RS_70._col0(Inner),Output:["_col1","_col2","_col11"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col0 + Select Operator [SEL_59] (rows=80353 width=1119) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_130] (rows=80353 width=1119) + Conds:RS_56._col0=RS_57._col1(Inner),Output:["_col2"] + <-Map 22 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1 + Select Operator [SEL_52] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_124] (rows=73049 width=1119) + predicate:(d_date_sk is not null and d_month_seq is not null) + TableScan [TS_50] (rows=73049 width=1119) + default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_129] (rows=9131 width=1128) + Conds:(Inner),Output:["_col0"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_53] + Group By Operator [GBY_34] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Group By Operator [GBY_32] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_31] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_122] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2) and d_month_seq is not null) + TableScan [TS_29] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 21 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_54] + Select Operator [SEL_49] (rows=1 width=8) + Filter Operator [FIL_48] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_46] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 20 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_45] + Group By Operator [GBY_44] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_42] (rows=9131 width=1119) + Group By Operator [GBY_41] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Group By Operator [GBY_39] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_38] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_123] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2)) + TableScan [TS_36] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_131] (rows=696954748 width=88) + Conds:RS_66._col7=RS_67._col0(Inner),Output:["_col1","_col2","_col5","_col11"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_108] (rows=508200 width=1436) - Conds:RS_54._col2=RS_55._col2(Left Outer),Output:["_col0","_col1","_col3","_col4"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=462000 width=1436) + Merge Join Operator [MERGEJOIN_128] (rows=88000001 width=860) + Conds:RS_25._col1=RS_26._col0(Inner),Output:["_col0","_col3"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col1 + Select Operator [SEL_21] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_120] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_19] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Select Operator [SEL_24] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_121] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_22] (rows=40000000 width=1014) + default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_127] (rows=633595212 width=88) + Conds:RS_63._col0=RS_64._col1(Inner),Output:["_col1","_col2","_col5","_col7"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col1 + Select Operator [SEL_18] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_100] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_0] (rows=462000 width=1436) - default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_category"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Select Operator [SEL_9] (rows=115500 width=1436) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=115500 width=1436) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_7] - PartitionCols:_col0 - Group By Operator [GBY_6] (rows=231000 width=1436) - Output:["_col0","_col1"],aggregations:["avg(i_current_price)"],keys:i_category - Select Operator [SEL_5] (rows=231000 width=1436) - Output:["i_category","i_current_price"] - Filter Operator [FIL_101] (rows=231000 width=1436) - predicate:(i_category = i_category) - TableScan [TS_3] (rows=462000 width=1436) - default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"] + Filter Operator [FIL_119] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) + TableScan [TS_16] (rows=575995635 width=88) + default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_63] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_126] (rows=508200 width=1436) + Conds:RS_60._col2=RS_61._col0(Left Outer),Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_117] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_0] (rows=462000 width=1436) + default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_category"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0 + Select Operator [SEL_15] (rows=19250 width=1436) + Output:["_col0"] + Filter Operator [FIL_14] (rows=19250 width=1436) + predicate:(sq_count_check(_col1) <= 1) + Group By Operator [GBY_13] (rows=57750 width=1436) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Group By Operator [GBY_8] (rows=115500 width=1436) + Output:["_col0"],keys:KEY._col0 + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col0 + Group By Operator [GBY_6] (rows=231000 width=1436) + Output:["_col0"],keys:i_category + Filter Operator [FIL_118] (rows=231000 width=1436) + predicate:(i_category = i_category) + TableScan [TS_3] (rows=462000 width=1436) + default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_category"] diff --git a/ql/src/test/results/clientpositive/perf/query81.q.out b/ql/src/test/results/clientpositive/perf/query81.q.out index 25bd68e..8234780 100644 --- a/ql/src/test/results/clientpositive/perf/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/query81.q.out @@ -59,163 +59,228 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 16 <- Map 19 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 22 <- Map 25 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 4 - File Output Operator [FS_67] - Limit [LIM_66] (rows=100 width=860) + Reducer 5 + File Output Operator [FS_101] + Limit [LIM_100] (rows=100 width=860) Number of rows:100 - Select Operator [SEL_65] (rows=32266667 width=860) + Select Operator [SEL_99] (rows=35493335 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_64] - Select Operator [SEL_63] (rows=32266667 width=860) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_98] + Select Operator [SEL_97] (rows=35493335 width=860) Output:["_col0","_col1","_col11","_col12","_col13","_col14","_col15","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_62] (rows=32266667 width=860) - predicate:(_col2 > CASE WHEN (_col22 is null) THEN (null) ELSE (_col21) END) - Select Operator [SEL_61] (rows=96800003 width=860) - Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col16","_col18","_col19","_col20","_col21","_col22"] - Merge Join Operator [MERGEJOIN_105] (rows=96800003 width=860) - Conds:RS_58._col0=RS_59._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20","_col21","_col22"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_104] (rows=24200000 width=1014) - Conds:RS_51._col1=RS_52._col2(Left Outer),Output:["_col0","_col2","_col3","_col4"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col2 - Select Operator [SEL_50] (rows=8711661 width=106) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_49] (rows=8711661 width=106) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 - Select Operator [SEL_45] (rows=17423323 width=106) - Output:["_col0","_col2"] - Group By Operator [GBY_44] (rows=17423323 width=106) + Filter Operator [FIL_96] (rows=35493335 width=860) + predicate:(_col2 > CASE WHEN (_col24 is null) THEN (null) ELSE (_col23) END) + Merge Join Operator [MERGEJOIN_153] (rows=106480005 width=860) + Conds:RS_93._col1=RS_94._col2(Left Outer),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col16","_col18","_col19","_col20","_col23","_col24"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col2 + Select Operator [SEL_92] (rows=8711661 width=106) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_91] (rows=8711661 width=106) + Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 + Select Operator [SEL_87] (rows=17423323 width=106) + Output:["_col0","_col2"] + Group By Operator [GBY_86] (rows=17423323 width=106) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col0 + Group By Operator [GBY_84] (rows=34846646 width=106) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_83] (rows=34846646 width=106) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_150] (rows=34846646 width=106) + Conds:RS_80._col2=RS_81._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 25 [SIMPLE_EDGE] + SHUFFLE [RS_81] + PartitionCols:_col0 + Select Operator [SEL_76] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_143] (rows=20000000 width=1014) + predicate:((ca_state = ca_state) and ca_address_sk is not null) + TableScan [TS_74] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_149] (rows=31678769 width=106) + Conds:RS_77._col0=RS_78._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_141] (rows=28798881 width=106) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) + TableScan [TS_68] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + <-Map 24 [SIMPLE_EDGE] + SHUFFLE [RS_78] + PartitionCols:_col0 + Select Operator [SEL_73] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_142] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_71] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col1 + Select Operator [SEL_67] (rows=96800003 width=860) + Output:["_col1","_col11","_col12","_col13","_col14","_col16","_col18","_col19","_col2","_col20","_col4","_col5","_col6","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_152] (rows=96800003 width=860) + Conds:RS_64._col0=RS_65._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col19","_col20"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_151] (rows=24200000 width=1014) + Conds:RS_57._col1=RS_58._col0(Left Outer),Output:["_col0","_col1","_col2"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1 + Select Operator [SEL_25] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_24] (rows=22000000 width=1014) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Group By Operator [GBY_42] (rows=34846646 width=106) + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=44000000 width=1014) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Select Operator [SEL_41] (rows=34846646 width=106) + Select Operator [SEL_21] (rows=44000000 width=1014) Output:["_col7","_col1","_col3"] - Merge Join Operator [MERGEJOIN_103] (rows=34846646 width=106) - Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_39] + Merge Join Operator [MERGEJOIN_146] (rows=44000000 width=1014) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_34] (rows=20000000 width=1014) + Select Operator [SEL_14] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_98] (rows=20000000 width=1014) - predicate:((ca_state = ca_state) and ca_address_sk is not null) - TableScan [TS_32] (rows=40000000 width=1014) + Filter Operator [FIL_137] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_12] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_38] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_102] (rows=31678769 width=106) - Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col0 - Select Operator [SEL_28] (rows=28798881 width=106) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_96] (rows=28798881 width=106) - predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) - TableScan [TS_26] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_36] + Merge Join Operator [MERGEJOIN_145] (rows=31678769 width=106) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_31] (rows=36524 width=1119) + Select Operator [SEL_11] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_97] (rows=36524 width=1119) + Filter Operator [FIL_136] (rows=36524 width=1119) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_29] (rows=73049 width=1119) + TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Select Operator [SEL_25] (rows=22000000 width=1014) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_24] (rows=22000000 width=1014) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=44000000 width=1014) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Select Operator [SEL_21] (rows=44000000 width=1014) - Output:["_col7","_col1","_col3"] - Merge Join Operator [MERGEJOIN_101] (rows=44000000 width=1014) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_95] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_100] (rows=31678769 width=106) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_94] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_15] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_135] (rows=28798881 width=106) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) + TableScan [TS_6] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=1451943 width=106) + Output:["_col0"] + Filter Operator [FIL_55] (rows=1451943 width=106) + predicate:(sq_count_check(_col1) <= 1) + Group By Operator [GBY_54] (rows=4355830 width=106) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Group By Operator [GBY_49] (rows=8711661 width=106) + Output:["_col0"],keys:_col0 + Select Operator [SEL_45] (rows=17423323 width=106) + Output:["_col0"] + Group By Operator [GBY_44] (rows=17423323 width=106) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col0 - Select Operator [SEL_8] (rows=28798881 width=106) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_93] (rows=28798881 width=106) - predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) - TableScan [TS_6] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_99] (rows=88000001 width=860) - Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_91] (rows=80000000 width=860) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] - <-Map 5 [SIMPLE_EDGE] - SHUFFLE [RS_56] + Group By Operator [GBY_42] (rows=34846646 width=106) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_41] (rows=34846646 width=106) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_148] (rows=34846646 width=106) + Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_140] (rows=20000000 width=1014) + predicate:((ca_state = ca_state) and ca_address_sk is not null) + TableScan [TS_32] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_147] (rows=31678769 width=106) + Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_138] (rows=28798881 width=106) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) + TableScan [TS_26] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_139] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_29] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_5] (rows=20000000 width=1014) - Output:["_col0","_col1","_col10","_col11","_col2","_col3","_col4","_col5","_col6","_col8","_col9"] - Filter Operator [FIL_92] (rows=20000000 width=1014) - predicate:((ca_state = 'IL') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] + Merge Join Operator [MERGEJOIN_144] (rows=88000001 width=860) + Conds:RS_61._col2=RS_62._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_133] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=20000000 width=1014) + Output:["_col0","_col1","_col10","_col11","_col2","_col3","_col4","_col5","_col6","_col8","_col9"] + Filter Operator [FIL_134] (rows=20000000 width=1014) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"]