diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cc6239c313..1d5da22d0d 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1631,7 +1631,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal + " expressed as multiple of Local FS read cost"), HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", true, "Toggle display of CBO warnings like missing column stats"), - HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS("hive.cbo.stats.correlated.multi.key.joins", false, + HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS("hive.cbo.stats.correlated.multi.key.joins", true, "When CBO estimates output rows for a join involving multiple columns, the default behavior assumes" + "the columns are independent. Setting this flag to true will cause the estimator to assume" + "the columns are correlated."), diff --git a/ql/src/test/results/clientpositive/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/join_alt_syntax.q.out index 454b117de4..6ed2a5bfee 100644 --- a/ql/src/test/results/clientpositive/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/join_alt_syntax.q.out @@ -384,9 +384,9 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -404,33 +404,35 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p4 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -445,11 +447,59 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -471,10 +521,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -482,54 +532,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) - TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -552,9 +554,9 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -572,33 +574,35 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p4 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -613,11 +617,59 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -639,10 +691,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -650,54 +702,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) - TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out index 93161e6c77..5c8f705b3b 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out @@ -154,31 +154,31 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p2 filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) @@ -191,18 +191,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -217,54 +217,59 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) + key expressions: _col9 (type: int), _col10 (type: string) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + 0 _col9 (type: int), _col10 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -289,15 +294,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out index 4e969afa4a..f950dfb2ed 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out @@ -158,31 +158,31 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p2 filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) @@ -195,18 +195,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -221,54 +221,59 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) + key expressions: _col9 (type: int), _col10 (type: string) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + 0 _col9 (type: int), _col10 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -293,15 +298,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out index c9017d04fd..a0f74ea647 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out @@ -202,48 +202,48 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p2 + filterExpr: (p2_name is not null and p2_partkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p2 - filterExpr: (p2_name is not null and p2_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p3_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -256,37 +256,37 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -299,11 +299,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col18 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Map-reduce partition columns: _col18 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -326,17 +326,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col18 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out index f22be61988..2fd43bc4b0 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out @@ -206,48 +206,48 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p2 + filterExpr: (p2_name is not null and p2_partkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p2 - filterExpr: (p2_name is not null and p2_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p3_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -260,37 +260,37 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -303,11 +303,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col18 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Map-reduce partition columns: _col18 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -330,17 +330,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col18 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out index de0eb29f63..ba787524f5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out @@ -110,7 +110,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 16 Map Operator Tree: TableScan alias: store @@ -134,37 +134,56 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 438), Reducer 9 (PARTITION-LEVEL SORT, 438) - Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 481), Reducer 10 (PARTITION-LEVEL SORT, 481) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) - Reducer 4 <- Map 16 (PARTITION-LEVEL SORT, 645), Reducer 3 (PARTITION-LEVEL SORT, 645) - Reducer 5 <- Reducer 4 (GROUP, 704) + Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) + Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) + Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Execution mode: vectorized Map 12 + Map Operator Tree: + TableScan + alias: d3 + filterExpr: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 13 Map Operator Tree: TableScan alias: store_returns @@ -178,51 +197,51 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: int) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 13 + Map 15 Map Operator Tree: TableScan - alias: d1 - filterExpr: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) + alias: d2 + filterExpr: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 14 + Map 7 Map Operator Tree: TableScan - alias: d2 - filterExpr: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + alias: d1 + filterExpr: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 16 + Map 8 Map Operator Tree: TableScan alias: item @@ -242,44 +261,25 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan - alias: d3 - filterExpr: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized Reducer 10 Reduce Operator Tree: @@ -289,47 +289,31 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col6 (type: int) - sort order: + - Map-reduce partition columns: _col6 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) Reducer 11 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col7, _col8, _col10 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col5, _col7, _col8, _col10, _col16 - input vertices: - 1 Map 15 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col16 (type: string), _col1 (type: int), _col5 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) - outputColumnNames: _col1, _col5, _col9, _col11, _col12, _col14 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col12 (type: int), _col11 (type: int) - sort order: ++ - Map-reduce partition columns: _col12 (type: int), _col11 (type: int) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: int), _col9 (type: int), _col14 (type: int) - Reducer 2 + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col7, _col8, _col9, _col10 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int) + sort order: +++ + Map-reduce partition columns: _col7 (type: int), _col8 (type: int), _col9 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col10 (type: int) + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -337,57 +321,85 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) - Reducer 3 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: int) + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int) - 1 _col12 (type: int), _col11 (type: int) - outputColumnNames: _col3, _col7, _col11, _col15, _col20 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col11 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col11 (type: int) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col7 (type: string), _col15 (type: int), _col20 (type: int) - Reducer 4 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col11 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col7, _col15, _col20, _col24, _col25 - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col24 (type: string), _col25 (type: string), _col7 (type: string), _col15 (type: int), _col20 (type: int), _col3 (type: int), UDFToDouble(_col15) (type: double), (UDFToDouble(_col15) * UDFToDouble(_col15)) (type: double), UDFToDouble(_col20) (type: double), (UDFToDouble(_col20) * UDFToDouble(_col20)) (type: double), UDFToDouble(_col3) (type: double), (UDFToDouble(_col3) * UDFToDouble(_col3)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col3), sum(_col3), sum(_col7), sum(_col6), count(_col4), sum(_col4), sum(_col9), sum(_col8), count(_col5), sum(_col5), sum(_col11), sum(_col10) - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: double), _col14 (type: double) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col9, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int), _col9 (type: string), _col10 (type: string) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col7 (type: int), _col8 (type: int), _col9 (type: int) + outputColumnNames: _col3, _col5, _col9, _col10, _col14, _col21 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col9, _col10, _col14, _col21, _col25 + input vertices: + 1 Map 16 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: string), _col10 (type: string), _col25 (type: string), _col5 (type: int), _col21 (type: int), _col14 (type: int), UDFToDouble(_col5) (type: double), (UDFToDouble(_col5) * UDFToDouble(_col5)) (type: double), UDFToDouble(_col21) (type: double), (UDFToDouble(_col21) * UDFToDouble(_col21)) (type: double), UDFToDouble(_col14) (type: double), (UDFToDouble(_col14) * UDFToDouble(_col14)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col3), sum(_col3), sum(_col7), sum(_col6), count(_col4), sum(_col4), sum(_col9), sum(_col8), count(_col5), sum(_col5), sum(_col11), sum(_col10) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: double), _col14 (type: double) Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -396,15 +408,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint), (UDFToDouble(_col4) / _col3) (type: double), power(((_col5 - ((_col6 * _col6) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (power(((_col5 - ((_col6 * _col6) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (UDFToDouble(_col4) / _col3)) (type: double), _col7 (type: bigint), (UDFToDouble(_col8) / _col7) (type: double), power(((_col9 - ((_col10 * _col10) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (power(((_col9 - ((_col10 * _col10) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) / (UDFToDouble(_col8) / _col7)) (type: double), _col11 (type: bigint), (UDFToDouble(_col12) / _col11) (type: double), (power(((_col13 - ((_col14 * _col14) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END), 0.5) / (UDFToDouble(_col12) / _col11)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Reducer 6 @@ -413,7 +425,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: bigint), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col10 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -424,22 +436,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index 457d662b82..4e2e8e7cf6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -123,7 +123,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 20 + Map 19 Map Operator Tree: TableScan alias: store @@ -147,9 +147,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 432), Map 19 (PARTITION-LEVEL SORT, 432) - Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009) - Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 534), Reducer 14 (PARTITION-LEVEL SORT, 534) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 975), Map 20 (PARTITION-LEVEL SORT, 975) + Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 486), Reducer 13 (PARTITION-LEVEL SORT, 486) + Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 564), Reducer 14 (PARTITION-LEVEL SORT, 564) Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 899), Reducer 15 (PARTITION-LEVEL SORT, 899) Reducer 17 <- Reducer 16 (GROUP, 640) Reducer 18 <- Reducer 17 (GROUP, 1) @@ -168,33 +168,26 @@ STAGE PLANS: expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)) - Execution mode: vectorized - Map 19 - Map Operator Tree: - TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9 + input vertices: + 1 Map 19 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) Execution mode: vectorized - Map 21 + Local Work: + Map Reduce Local Work + Map 20 Map Operator Tree: TableScan alias: customer @@ -214,7 +207,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 22 + Map 21 Map Operator Tree: TableScan alias: item @@ -234,6 +227,25 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized + Map 22 + Map Operator Tree: + TableScan + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 23 Map Operator Tree: TableScan @@ -255,85 +267,73 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized Reducer 13 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4, _col8, _col10, _col11 - input vertices: - 1 Map 20 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col8 (type: string), _col10 (type: string), _col11 (type: string) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col8, _col10, _col11, _col13, _col14, _col15 + outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string) + value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col8, _col10, _col11, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21 + 0 _col0 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col11 (type: string), _col15 (type: string) + key expressions: _col9 (type: string), _col13 (type: string) sort order: ++ - Map-reduce partition columns: _col11 (type: string), _col15 (type: string) + Map-reduce partition columns: _col9 (type: string), _col13 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col8 (type: string), _col10 (type: string), _col13 (type: string), _col14 (type: string), _col17 (type: decimal(7,2)), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: int) + value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col11 (type: string), _col15 (type: string) + 0 _col9 (type: string), _col13 (type: string) 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col8, _col10, _col13, _col14, _col17, _col18, _col19, _col20, _col21, _col22 + outputColumnNames: _col4, _col6, _col8, _col11, _col12, _col15, _col16, _col17, _col18, _col19, _col22 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4) - keys: _col22 (type: string), _col17 (type: decimal(7,2)), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: int), _col13 (type: string), _col14 (type: string), _col8 (type: string), _col10 (type: string) + keys: _col11 (type: string), _col12 (type: string), _col6 (type: string), _col8 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int), _col22 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) sort order: ++++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col10 (type: decimal(17,2)) Reducer 17 @@ -341,7 +341,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: decimal(7,2)), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: int), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int), KEY._col9 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE @@ -381,7 +381,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 8 Map Operator Tree: TableScan alias: store @@ -405,9 +405,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 564), Reducer 3 (PARTITION-LEVEL SORT, 564) Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899) Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640) #### A masked pattern was here #### @@ -426,31 +426,30 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 10 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 11 Map Operator Tree: @@ -473,25 +472,6 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized Map 7 - Map Operator Tree: - TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 8 Map Operator Tree: TableScan alias: item @@ -511,23 +491,27 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized + Map 9 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)) - Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -537,25 +521,25 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col8, _col9, _col11, _col12 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col9, _col10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col8, _col9, _col11, _col12, _col14, _col16, _col17 + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15 input vertices: - 1 Map 9 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 1 Map 8 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: string), _col11 (type: string), _col12 (type: int), _col14 (type: string), _col16 (type: string), _col17 (type: string) - Reducer 4 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -563,27 +547,43 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col8, _col9, _col11, _col12, _col14, _col16, _col17, _col19, _col20, _col21 + outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col17 (type: string), _col21 (type: string) + key expressions: _col15 (type: string), _col19 (type: string) sort order: ++ - Map-reduce partition columns: _col17 (type: string), _col21 (type: string) + Map-reduce partition columns: _col15 (type: string), _col19 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: string), _col11 (type: string), _col12 (type: int), _col14 (type: string), _col16 (type: string), _col19 (type: string), _col20 (type: string) + value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col17 (type: string), _col18 (type: string) Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col17 (type: string), _col21 (type: string) + 0 _col15 (type: string), _col19 (type: string) 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col8, _col9, _col11, _col12, _col14, _col16, _col19, _col20, _col22 + outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col17, _col18, _col22 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4) - keys: _col19 (type: string), _col20 (type: string), _col14 (type: string), _col22 (type: string), _col8 (type: decimal(7,2)), _col9 (type: string), _col11 (type: string), _col12 (type: int), _col16 (type: string) + keys: _col17 (type: string), _col18 (type: string), _col12 (type: string), _col22 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col14 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out index 61795c304b..5bee2a361f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out @@ -116,7 +116,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 16 Map Operator Tree: TableScan alias: store @@ -140,35 +140,54 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 437), Reducer 9 (PARTITION-LEVEL SORT, 437) - Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 481), Reducer 10 (PARTITION-LEVEL SORT, 481) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) - Reducer 4 <- Map 16 (PARTITION-LEVEL SORT, 645), Reducer 3 (PARTITION-LEVEL SORT, 645) - Reducer 5 <- Reducer 4 (GROUP, 704) + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 374), Reducer 9 (PARTITION-LEVEL SORT, 374) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 806), Reducer 2 (PARTITION-LEVEL SORT, 806) + Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486) + Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) + Execution mode: vectorized + Map 11 + Map Operator Tree: + TableScan + alias: d3 + filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 12 Map Operator Tree: @@ -183,31 +202,12 @@ STAGE PLANS: expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)) - Execution mode: vectorized - Map 13 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 14 Map Operator Tree: @@ -228,7 +228,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 16 + Map 15 Map Operator Tree: TableScan alias: item @@ -251,41 +251,41 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: d3 - filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) + alias: d1 + filterExpr: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 8 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized Reducer 10 Reduce Operator Tree: @@ -293,48 +293,32 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col8, _col9, _col10, _col11 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col6 (type: int) - sort order: + - Map-reduce partition columns: _col6 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col10 (type: decimal(7,2)) - Reducer 11 - Local Work: - Map Reduce Local Work + key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int) + sort order: +++ + Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + Reducer 13 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col7, _col8, _col10 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col5, _col7, _col8, _col10, _col18, _col19 - input vertices: - 1 Map 15 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: string), _col19 (type: string), _col1 (type: int), _col5 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col10 (type: decimal(7,2)) - outputColumnNames: _col1, _col2, _col7, _col11, _col13, _col14, _col16 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col14 (type: int), _col13 (type: int) - sort order: ++ - Map-reduce partition columns: _col14 (type: int), _col13 (type: int) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col7 (type: int), _col11 (type: decimal(7,2)), _col16 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -343,53 +327,65 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)) + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int) - 1 _col14 (type: int), _col13 (type: int) - outputColumnNames: _col3, _col8, _col9, _col14, _col18, _col23 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col8 (type: int), _col9 (type: int), _col10 (type: int) + outputColumnNames: _col1, _col3, _col5, _col12, _col20 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col14 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col14 (type: int) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string), _col18 (type: decimal(7,2)), _col23 (type: decimal(7,2)) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col12 (type: decimal(7,2)), _col20 (type: decimal(7,2)) Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col14 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col8, _col9, _col18, _col23, _col28, _col29 - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col18), sum(_col23), sum(_col3) - keys: _col28 (type: string), _col29 (type: string), _col8 (type: string), _col9 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) + outputColumnNames: _col3, _col5, _col12, _col20, _col25, _col26 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col12, _col20, _col25, _col26, _col28, _col29 + input vertices: + 1 Map 16 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col20), sum(_col12) + keys: _col25 (type: string), _col26 (type: string), _col28 (type: string), _col29 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -398,11 +394,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 6 @@ -411,7 +407,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -428,16 +424,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: decimal(7,2)) + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index b952f35523..2e5c0f3bf5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -114,7 +114,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 16 Map Operator Tree: TableScan alias: store @@ -138,14 +138,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 437), Reducer 9 (PARTITION-LEVEL SORT, 437) - Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 481), Reducer 10 (PARTITION-LEVEL SORT, 481) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) - Reducer 4 <- Map 16 (PARTITION-LEVEL SORT, 645), Reducer 3 (PARTITION-LEVEL SORT, 645) - Reducer 5 <- Reducer 4 (GROUP, 704) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) + Reducer 10 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) + Reducer 4 <- Reducer 3 (GROUP, 640) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) #### A masked pattern was here #### Vertices: Map 1 @@ -168,27 +168,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 12 - Map Operator Tree: - TableScan - alias: store_returns - filterExpr: (sr_customer_sk is not null and sr_item_sk is not null and sr_ticket_number is not null and sr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: int) - Execution mode: vectorized - Map 13 + Map 11 Map Operator Tree: TableScan alias: d1 @@ -207,6 +187,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 12 + Map Operator Tree: + TableScan + alias: store_returns + filterExpr: (sr_customer_sk is not null and sr_item_sk is not null and sr_ticket_number is not null and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Execution mode: vectorized Map 14 Map Operator Tree: TableScan @@ -226,7 +226,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 16 + Map 15 Map Operator Tree: TableScan alias: item @@ -246,7 +246,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: d3 @@ -265,7 +265,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan alias: store_sales @@ -279,29 +279,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: int) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Execution mode: vectorized Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: int) - sort order: + - Map-reduce partition columns: _col6 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) - Reducer 11 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -309,9 +293,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col7, _col8, _col10 + outputColumnNames: _col3, _col5, _col10, _col11, _col13, _col18, _col19 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -319,20 +303,36 @@ STAGE PLANS: keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col5, _col7, _col8, _col10, _col18, _col19 + outputColumnNames: _col5, _col10, _col11, _col13, _col18, _col19, _col21, _col22 input vertices: - 1 Map 15 + 1 Map 16 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: string), _col19 (type: string), _col1 (type: int), _col5 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) - outputColumnNames: _col1, _col2, _col7, _col11, _col13, _col14, _col16 + expressions: _col18 (type: string), _col19 (type: string), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col21 (type: string), _col22 (type: string) + outputColumnNames: _col1, _col2, _col8, _col13, _col14, _col16, _col21, _col22 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col14 (type: int), _col13 (type: int) sort order: ++ Map-reduce partition columns: _col14 (type: int), _col13 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col7 (type: int), _col11 (type: int), _col16 (type: int) + value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col16 (type: int), _col21 (type: string), _col22 (type: string) + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -357,38 +357,22 @@ STAGE PLANS: keys: 0 _col1 (type: int), _col2 (type: int) 1 _col14 (type: int), _col13 (type: int) - outputColumnNames: _col3, _col7, _col8, _col13, _col17, _col22 + outputColumnNames: _col3, _col7, _col8, _col14, _col22, _col27, _col28 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: int) - sort order: + - Map-reduce partition columns: _col13 (type: int) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col7 (type: string), _col8 (type: string), _col17 (type: int), _col22 (type: int) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col13 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col7, _col8, _col17, _col22, _col27, _col28 - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col17), sum(_col22), sum(_col3) - keys: _col27 (type: string), _col28 (type: string), _col7 (type: string), _col8 (type: string) + aggregations: sum(_col14), sum(_col22), sum(_col3) + keys: _col7 (type: string), _col8 (type: string), _col27 (type: string), _col28 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -396,20 +380,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -420,6 +404,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int) Reducer 9 Reduce Operator Tree: Join Operator @@ -428,14 +428,14 @@ STAGE PLANS: keys: 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3, _col5, _col10, _col11, _col13 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query50.q.out b/ql/src/test/results/clientpositive/perf/spark/query50.q.out index 1abdabe7ac..2a84a9274a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query50.q.out @@ -149,7 +149,7 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col10 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -158,34 +158,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 437), Reducer 2 (PARTITION-LEVEL SORT, 437) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 481), Reducer 3 (PARTITION-LEVEL SORT, 481) - Reducer 5 <- Reducer 4 (GROUP, 582) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 36), Map 7 (PARTITION-LEVEL SORT, 36) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 436), Reducer 2 (PARTITION-LEVEL SORT, 436) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 438), Reducer 3 (PARTITION-LEVEL SORT, 438) + Reducer 5 <- Reducer 4 (GROUP, 529) Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) - Execution mode: vectorized - Map 7 Map Operator Tree: TableScan alias: store_returns @@ -199,13 +179,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan alias: d2 @@ -224,6 +204,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) + Execution mode: vectorized Map 9 Map Operator Tree: TableScan @@ -249,32 +249,32 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col3, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col5 (type: int) - sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col5 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int), _col3 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col7, _col10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col7 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int) + Map-reduce partition columns: _col7 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col10 (type: int) Reducer 4 Local Work: Map Reduce Local Work @@ -283,35 +283,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col7 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col5 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col7, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col10 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col5, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + outputColumnNames: _col0, _col7, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 10 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string), CASE WHEN (((_col5 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 30) and ((_col5 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 60) and ((_col5 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 90) and ((_col5 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col5 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) + expressions: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string), CASE WHEN (((_col0 - _col7) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 30) and ((_col0 - _col7) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 60) and ((_col0 - _col7) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 90) and ((_col0 - _col7) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col0 - _col7) > 120)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14) keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) sort order: ++++++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) Reducer 5 @@ -322,11 +322,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) sort order: ++++++++++ - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) Reducer 6 @@ -335,7 +335,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out index 84a9dea4fa..f10250f307 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join JOIN[83][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0, $hdt$_4, $hdt$_5, $hdt$_6]] in Work 'Reducer 5' is a cross product -Warning: Shuffle Join JOIN[114][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 6' is a cross product +Warning: Shuffle Join JOIN[84][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[115][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[145][bigTable=?] in task 'Stage-1:MAPRED' is a cross product Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[143][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -211,11 +211,11 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 22 <- Map 21 (GROUP, 2) - Reducer 23 <- Reducer 22 (GROUP, 1) + Reducer 23 <- Map 22 (GROUP, 2) + Reducer 24 <- Reducer 23 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 21 + Map 22 Map Operator Tree: TableScan alias: date_dim @@ -239,7 +239,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 22 + Reducer 23 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -258,7 +258,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 23 + Reducer 24 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -282,7 +282,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 12 Map Operator Tree: TableScan alias: store @@ -306,23 +306,43 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 458), Map 16 (PARTITION-LEVEL SORT, 458), Map 17 (PARTITION-LEVEL SORT, 458) - Reducer 13 <- Map 18 (PARTITION-LEVEL SORT, 505), Reducer 12 (PARTITION-LEVEL SORT, 505) - Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009) - Reducer 15 <- Reducer 14 (GROUP, 610) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 654), Reducer 15 (PARTITION-LEVEL SORT, 654) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 654), Reducer 17 (PARTITION-LEVEL SORT, 654) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 458), Map 18 (PARTITION-LEVEL SORT, 458), Map 19 (PARTITION-LEVEL SORT, 458) + Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 505), Reducer 14 (PARTITION-LEVEL SORT, 505) + Reducer 16 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009) + Reducer 17 <- Reducer 16 (GROUP, 610) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) Reducer 26 <- Map 25 (GROUP, 2) - Reducer 3 <- Map 20 (PARTITION-LEVEL SORT, 733), Reducer 2 (PARTITION-LEVEL SORT, 733) + Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 772), Reducer 2 (PARTITION-LEVEL SORT, 772) Reducer 31 <- Map 30 (GROUP, 2) - Reducer 4 <- Map 24 (PARTITION-LEVEL SORT, 482), Reducer 3 (PARTITION-LEVEL SORT, 482) - Reducer 5 <- Reducer 26 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Reducer 31 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 26 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 31 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1009) Reducer 7 <- Reducer 6 (GROUP, 1009) - Reducer 8 <- Reducer 7 (GROUP, 1009) - Reducer 9 <- Reducer 8 (SORT, 1) + Reducer 8 <- Reducer 7 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: customer_address @@ -343,7 +363,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 10 + 1 Map 12 Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -353,7 +373,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 11 + Map 13 Map Operator Tree: TableScan alias: catalog_sales @@ -373,7 +393,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 16 + Map 18 Map Operator Tree: TableScan alias: web_sales @@ -393,7 +413,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 17 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -412,7 +432,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 18 + Map 20 Map Operator Tree: TableScan alias: item @@ -431,7 +451,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 19 + Map 21 Map Operator Tree: TableScan alias: customer @@ -451,46 +471,6 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 20 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) - Execution mode: vectorized - Map 24 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_month_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized Map 25 Map Operator Tree: TableScan @@ -539,7 +519,42 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 12 + Map 9 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_month_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col5 + Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -555,7 +570,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Reducer 13 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -570,7 +585,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE - Reducer 14 + Reducer 16 Reduce Operator Tree: Join Operator condition map: @@ -590,7 +605,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE - Reducer 15 + Reducer 17 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -615,14 +630,15 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col5 - Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col5 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) Reducer 26 Execution mode: vectorized Reduce Operator Tree: @@ -643,26 +659,24 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col5 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col5, _col7, _col9 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col2, _col4, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 1 - outputColumnNames: _col5, _col7, _col9 + outputColumnNames: _col2, _col4, _col10 input vertices: - 1 Reducer 23 - Statistics: Num rows: 633595212 Data size: 61598310416 Basic stats: COMPLETE Column stats: NONE + 1 Reducer 24 + Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col7 (type: int) - sort order: + - Map-reduce partition columns: _col7 (type: int) - Statistics: Num rows: 633595212 Data size: 61598310416 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int), _col9 (type: decimal(7,2)) + sort order: + Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int) Reducer 31 Execution mode: vectorized Reduce Operator Tree: @@ -676,20 +690,6 @@ STAGE PLANS: Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col7 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col9, _col12 - Statistics: Num rows: 696954748 Data size: 67758142926 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 696954748 Data size: 67758142926 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int), _col9 (type: decimal(7,2)), _col12 (type: int) - Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -699,12 +699,12 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col5, _col9, _col12, _col13 - Statistics: Num rows: 6363893803988 Data size: 7746260663523866 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col4, _col10, _col13 + Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: int), _col9 (type: decimal(7,2)), _col12 (type: int), _col13 (type: int) + expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int) outputColumnNames: _col0, _col4, _col11, _col13 - Statistics: Num rows: 6363893803988 Data size: 7746260663523866 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -714,16 +714,16 @@ STAGE PLANS: outputColumnNames: _col0, _col4, _col11, _col13 input vertices: 1 Reducer 29 - Statistics: Num rows: 6363893803988 Data size: 7803535707759758 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6363893803988 Data size: 7803535707759758 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 6363893803988 Data size: 7803535707759758 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: decimal(7,2)), _col2 (type: int), _col3 (type: int) - Reducer 6 + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -752,7 +752,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 7 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -777,7 +777,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 8 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -796,7 +796,7 @@ STAGE PLANS: Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: int) - Reducer 9 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query72.q.out b/ql/src/test/results/clientpositive/perf/spark/query72.q.out index 10a75c64c3..77792ebe27 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query72.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 7 Map Operator Tree: TableScan alias: warehouse @@ -114,7 +114,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 15 Map Operator Tree: TableScan alias: household_demographics @@ -134,7 +134,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 17 + Map 16 Map Operator Tree: TableScan alias: promotion @@ -154,57 +154,50 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 14 (PARTITION-LEVEL SORT, 306) - Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 338), Reducer 11 (PARTITION-LEVEL SORT, 338) - Reducer 13 <- Map 18 (PARTITION-LEVEL SORT, 452), Reducer 12 (PARTITION-LEVEL SORT, 452) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 97), Reducer 8 (PARTITION-LEVEL SORT, 97) - Reducer 3 <- Reducer 2 (GROUP, 80) - Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 496), Reducer 13 (PARTITION-LEVEL SORT, 496) - Reducer 7 <- Map 19 (PARTITION-LEVEL SORT, 181), Reducer 6 (PARTITION-LEVEL SORT, 181) - Reducer 8 <- Map 20 (PARTITION-LEVEL SORT, 199), Reducer 7 (PARTITION-LEVEL SORT, 199) + Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 338), Reducer 9 (PARTITION-LEVEL SORT, 338) + Reducer 11 <- Map 17 (PARTITION-LEVEL SORT, 452), Reducer 10 (PARTITION-LEVEL SORT, 452) + Reducer 12 <- Map 18 (PARTITION-LEVEL SORT, 492), Reducer 11 (PARTITION-LEVEL SORT, 492) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 186), Reducer 12 (PARTITION-LEVEL SORT, 186) + Reducer 3 <- Map 19 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67) + Reducer 4 <- Map 20 (PARTITION-LEVEL SORT, 97), Reducer 3 (PARTITION-LEVEL SORT, 97) + Reducer 5 <- Reducer 4 (GROUP, 80) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: cr_item_sk is not null (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cr_item_sk is not null (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cr_item_sk (type: int), cr_order_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_item_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk is not null and cs_ship_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: inventory + filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5 + input vertices: + 1 Map 7 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: string) Execution mode: vectorized - Map 14 + Local Work: + Map Reduce Local Work + Map 13 Map Operator Tree: TableScan alias: d1 @@ -224,7 +217,7 @@ STAGE PLANS: Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int) Execution mode: vectorized - Map 15 + Map 14 Map Operator Tree: TableScan alias: customer_demographics @@ -243,7 +236,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 18 + Map 17 Map Operator Tree: TableScan alias: item @@ -263,6 +256,26 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Map 18 + Map Operator Tree: + TableScan + alias: d3 + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized Map 19 Map Operator Tree: TableScan @@ -285,72 +298,43 @@ STAGE PLANS: Map 20 Map Operator Tree: TableScan - alias: d3 - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: cr_item_sk is not null (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: cr_item_sk is not null (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: cr_item_sk (type: int), cr_order_number (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 + Map 8 Map Operator Tree: TableScan - alias: inventory - filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_item_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk is not null and cs_ship_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5 - input vertices: - 1 Map 9 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: string) + expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int) - Reducer 12 + Reducer 10 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -370,7 +354,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col9, _col10 input vertices: - 1 Map 16 + 1 Map 15 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -380,7 +364,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col16 input vertices: - 1 Map 17 + 1 Map 16 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int) @@ -388,7 +372,7 @@ STAGE PLANS: Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col16 (type: int) - Reducer 13 + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -398,43 +382,101 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col16, _col18 Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: string), _col1 (type: int), _col4 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col16 (type: int) - outputColumnNames: _col1, _col3, _col6, _col8, _col9, _col11, _col12, _col18 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: int) - sort order: + - Map-reduce partition columns: _col6 (type: int) - Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: int), _col8 (type: int), _col9 (type: int), _col11 (type: string), _col12 (type: int), _col18 (type: int) + value expressions: _col4 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col16 (type: int), _col18 (type: string) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col16, _col18, _col20 + Statistics: Num rows: 510191624 Data size: 69090195216 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0D)) (type: boolean) + Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: string), _col4 (type: int), _col6 (type: int), _col7 (type: int), _col10 (type: int), _col16 (type: int) + outputColumnNames: _col3, _col8, _col10, _col11, _col14, _col20 + Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string), _col10 (type: int), _col11 (type: int), _col14 (type: int), _col20 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col8 (type: int) + outputColumnNames: _col0, _col3, _col5, _col9, _col14, _col16, _col17, _col20, _col26 + Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 < _col17) (type: boolean) + Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col20 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col20 (type: int) + Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: string), _col9 (type: string), _col14 (type: int), _col16 (type: int), _col26 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col20 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col5, _col9, _col14, _col16, _col20, _col26 + Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col14 (type: int), _col16 (type: int), _col5 (type: string), _col9 (type: string), _col20 (type: int), _col26 (type: int) + outputColumnNames: _col4, _col6, _col13, _col15, _col22, _col28 + Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int), _col6 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: int), _col6 (type: int) + Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE + value expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int), _col28 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: int) - 1 _col4 (type: int), _col6 (type: int) - outputColumnNames: _col15, _col17, _col24, _col30 - Statistics: Num rows: 75451675 Data size: 10217672727 Basic stats: COMPLETE Column stats: NONE + 0 _col4 (type: int), _col6 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col13, _col15, _col22, _col28 + Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col17 (type: string), _col15 (type: string), _col24 (type: int), CASE WHEN (_col30 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col30 is not null) THEN (1) ELSE (0) END (type: int) + expressions: _col15 (type: string), _col13 (type: string), _col22 (type: int), CASE WHEN (_col28 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col28 is not null) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 75451675 Data size: 10217672727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col3), count(_col4), count() keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 75451675 Data size: 10217672727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 75451675 Data size: 10217672727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) - Reducer 3 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -442,20 +484,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 37725837 Data size: 5108836295 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: -+++ - Statistics: Num rows: 37725837 Data size: 5108836295 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 4 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 37725837 Data size: 5108836295 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE @@ -466,64 +508,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col6 (type: int) - outputColumnNames: _col0, _col3, _col5, _col7, _col9, _col12, _col14, _col15, _col17, _col18, _col24 - Statistics: Num rows: 510191624 Data size: 69090195216 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 < _col15) (type: boolean) - Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col18 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col18 (type: int) - Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col7 (type: string), _col9 (type: int), _col12 (type: int), _col14 (type: int), _col17 (type: string), _col24 (type: int) - Reducer 7 + Reducer 9 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col18 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col5, _col7, _col9, _col12, _col14, _col17, _col18, _col24 - Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col9 (type: int) - Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col7 (type: string), _col12 (type: int), _col14 (type: int), _col17 (type: string), _col18 (type: int), _col24 (type: int) - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col9 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7, _col12, _col14, _col17, _col18, _col24, _col28 - Statistics: Num rows: 205777295 Data size: 27866379834 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col28) > (UDFToDouble(_col17) + 5.0D)) (type: boolean) - Statistics: Num rows: 68592431 Data size: 9288793187 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: int), _col14 (type: int), _col5 (type: string), _col7 (type: string), _col18 (type: int), _col24 (type: int) - outputColumnNames: _col4, _col6, _col13, _col15, _col22, _col28 - Statistics: Num rows: 68592431 Data size: 9288793187 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int), _col6 (type: int) - sort order: ++ - Map-reduce partition columns: _col4 (type: int), _col6 (type: int) - Statistics: Num rows: 68592431 Data size: 9288793187 Basic stats: COMPLETE Column stats: NONE - value expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int), _col28 (type: int) + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index 139a32be44..3ec14ed61d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -190,7 +190,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 11 + Map 14 Map Operator Tree: TableScan alias: web_page @@ -205,12 +205,12 @@ STAGE PLANS: Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col10 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 12 + Map 15 Map Operator Tree: TableScan alias: reason @@ -225,7 +225,7 @@ STAGE PLANS: Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col13 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -234,16 +234,36 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 62), Map 9 (PARTITION-LEVEL SORT, 62) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 57), Reducer 2 (PARTITION-LEVEL SORT, 57) - Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 81), Reducer 3 (PARTITION-LEVEL SORT, 81) - Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 13), Reducer 4 (PARTITION-LEVEL SORT, 13) - Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 167), Reducer 5 (PARTITION-LEVEL SORT, 167) - Reducer 7 <- Reducer 6 (GROUP, 59) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 52), Map 9 (PARTITION-LEVEL SORT, 52) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 68), Reducer 3 (PARTITION-LEVEL SORT, 68) + Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 12), Reducer 4 (PARTITION-LEVEL SORT, 12) + Reducer 6 <- Map 13 (PARTITION-LEVEL SORT, 165), Reducer 5 (PARTITION-LEVEL SORT, 165) + Reducer 7 <- Reducer 6 (GROUP, 71) Reducer 8 <- Reducer 7 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: web_sales + filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: web_returns @@ -263,26 +283,7 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 13 + Map 11 Map Operator Tree: TableScan alias: cd1 @@ -302,7 +303,7 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 14 + Map 12 Map Operator Tree: TableScan alias: cd2 @@ -321,7 +322,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 + Map 13 Map Operator Tree: TableScan alias: customer_address @@ -344,22 +345,21 @@ STAGE PLANS: Map 9 Map Operator Tree: TableScan - alias: web_sales - filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -367,118 +367,114 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) + key expressions: _col1 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col3 (type: int) Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col10 (type: int), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 3 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col8 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col10, _col12, _col13, _col14 + 0 _col1 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col5 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col10, _col11, _col12, _col13, _col15, _col16 Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col10 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col12, _col13, _col14 - input vertices: - 1 Map 11 - Statistics: Num rows: 63889183 Data size: 8687081595 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col6, _col7, _col12, _col13, _col14, _col19 - input vertices: - 1 Map 12 - Statistics: Num rows: 70278102 Data size: 9555789961 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 70278102 Data size: 9555789961 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)), _col19 (type: string) + Reduce Output Operator + key expressions: _col10 (type: int) + sort order: + + Map-reduce partition columns: _col10 (type: int) + Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col11 (type: int), _col12 (type: int), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col10 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col6, _col7, _col12, _col13, _col14, _col19, _col21, _col22 - Statistics: Num rows: 77305913 Data size: 10511369184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col4, _col5, _col6, _col11, _col12, _col13, _col15, _col16, _col18, _col19 + Statistics: Num rows: 63889183 Data size: 8687081595 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col21 = 'D') and (_col22 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col21 = 'M') and (_col22 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col21 = 'U') and (_col22 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) (type: boolean) - Statistics: Num rows: 6442158 Data size: 875947239 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col18 = 'D') and (_col19 = 'Primary') and _col5 BETWEEN 50 AND 100) or ((_col18 = 'M') and (_col19 = '4 yr Degree') and _col5 BETWEEN 100 AND 150) or ((_col18 = 'U') and (_col19 = 'Advanced Degree') and _col5 BETWEEN 150 AND 200)) (type: boolean) + Statistics: Num rows: 5324097 Data size: 723923250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), _col21 (type: string), _col22 (type: string) + key expressions: _col12 (type: int), _col18 (type: string), _col19 (type: string) sort order: +++ - Map-reduce partition columns: _col3 (type: int), _col21 (type: string), _col22 (type: string) - Statistics: Num rows: 6442158 Data size: 875947239 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col14 (type: decimal(7,2)), _col19 (type: string) + Map-reduce partition columns: _col12 (type: int), _col18 (type: string), _col19 (type: string) + Statistics: Num rows: 5324097 Data size: 723923250 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int), _col21 (type: string), _col22 (type: string) + 0 _col12 (type: int), _col18 (type: string), _col19 (type: string) 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col2, _col6, _col7, _col12, _col14, _col19 - Statistics: Num rows: 7086373 Data size: 963541983 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col4, _col6, _col11, _col13, _col15, _col16 + Statistics: Num rows: 5856506 Data size: 796315592 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col11 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 7086373 Data size: 963541983 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col14 (type: decimal(7,2)), _col19 (type: string) + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 5856506 Data size: 796315592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) Reducer 6 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col11 (type: int) 1 _col0 (type: int) - outputColumnNames: _col6, _col7, _col12, _col14, _col19, _col27 + outputColumnNames: _col2, _col4, _col6, _col13, _col15, _col16, _col24 Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((_col27 = 'KY') or (_col27 = 'GA') or (_col27 = 'NM')) and _col14 BETWEEN 100 AND 200) or (((_col27 = 'MT') or (_col27 = 'OR') or (_col27 = 'IN')) and _col14 BETWEEN 150 AND 300) or (((_col27 = 'WI') or (_col27 = 'MO') or (_col27 = 'WV')) and _col14 BETWEEN 50 AND 250)) (type: boolean) + predicate: ((((_col24 = 'KY') or (_col24 = 'GA') or (_col24 = 'NM')) and _col6 BETWEEN 100 AND 200) or (((_col24 = 'MT') or (_col24 = 'OR') or (_col24 = 'IN')) and _col6 BETWEEN 150 AND 300) or (((_col24 = 'WI') or (_col24 = 'MO') or (_col24 = 'WV')) and _col6 BETWEEN 50 AND 250)) (type: boolean) Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col19 (type: string) - outputColumnNames: _col6, _col7, _col12, _col19 - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col12), count(_col12), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col19 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col13, _col15, _col16 + input vertices: + 1 Map 14 + Statistics: Num rows: 8066665 Data size: 8186696581 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col13 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col15, _col16, _col28 + input vertices: + 1 Map 15 + Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4), count(_col4), sum(_col16), count(_col16), sum(_col15), count(_col15) + keys: _col28 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) Reducer 7 Execution mode: vectorized Reduce Operator Tree: @@ -487,15 +483,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(_col1) / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), substr(_col0, 1, 20) (type: string) outputColumnNames: _col4, _col5, _col6, _col7 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22)) sort order: ++++ - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 8 Execution mode: vectorized @@ -503,7 +499,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(37,22)), KEY.reducesinkkey3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out index 60e956ef16..3d6e1da184 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -103,225 +103,224 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Map 13 <- Reducer 12 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) -Reducer 10 <- Map 19 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 7 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 18 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 18 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized + Reducer 7 vectorized File Output Operator [FS_270] Limit [LIM_269] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_268] (rows=510205767 width=88) + Select Operator [SEL_268] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 5 [SIMPLE_EDGE] vectorized + <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_267] - Select Operator [SEL_266] (rows=510205767 width=88) + Select Operator [SEL_266] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_265] (rows=510205767 width=88) + Group By Operator [GBY_265] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_50] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=1020411534 width=88) + Group By Operator [GBY_49] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_95] (rows=1020411534 width=88) + Top N Key Operator [TNK_93] (rows=843315281 width=88) keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_46] (rows=1020411534 width=88) + Select Operator [SEL_47] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_207] (rows=1020411534 width=88) - Conds:RS_43._col11=RS_256._col0(Inner),Output:["_col3","_col7","_col15","_col20","_col24","_col25"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_256] + Merge Join Operator [MERGEJOIN_213] (rows=843315281 width=88) + Conds:RS_44._col3=RS_251._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] PartitionCols:_col0 - Select Operator [SEL_255] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_254] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_34] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col11 - Merge Join Operator [MERGEJOIN_206] (rows=927646829 width=88) - Conds:RS_40._col1, _col2=RS_41._col12, _col11(Inner),Output:["_col3","_col7","_col11","_col15","_col20"] - <-Reducer 10 [SIMPLE_EDGE] + Select Operator [SEL_250] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_249] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_32] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_212] (rows=766650239 width=88) + Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col7, _col8, _col9 + Merge Join Operator [MERGEJOIN_211] (rows=348467716 width=135) + Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] + <-Reducer 13 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_29] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_210] (rows=63350266 width=77) + Conds:RS_242._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_224] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=73049 width=1119) + Output:["_col0"] + Filter Operator [FIL_216] (rows=73049 width=1119) + predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] + PartitionCols:_col0 + Select Operator [SEL_241] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_240] (rows=57591150 width=77) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_15] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col2, _col1 + Merge Join Operator [MERGEJOIN_209] (rows=316788826 width=135) + Conds:RS_264._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_222] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=73049 width=1119) + Output:["_col0"] + Filter Operator [FIL_215] (rows=73049 width=1119) + predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_264] + PartitionCols:_col0 + Select Operator [SEL_263] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_262] (rows=287989836 width=135) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_9] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_245] + Group By Operator [GBY_243] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_109] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_210] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_248] + Group By Operator [GBY_246] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_126] + Group By Operator [GBY_125] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_124] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_210] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Group By Operator [GBY_237] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_234] (rows=462000 width=1436) + Output:["_col0"] + Select Operator [SEL_232] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_231] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_261] + Group By Operator [GBY_260] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_228] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_223] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_218] + <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] - PartitionCols:_col12, _col11 - Select Operator [SEL_33] (rows=843315281 width=88) - Output:["_col1","_col5","_col9","_col11","_col12","_col14"] - Merge Join Operator [MERGEJOIN_205] (rows=843315281 width=88) - Conds:RS_30._col3=RS_248._col0(Inner),Output:["_col1","_col5","_col7","_col8","_col10","_col16"] - <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_248] - PartitionCols:_col0 - Select Operator [SEL_247] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_246] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_18] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_204] (rows=766650239 width=88) - Conds:RS_27._col6=RS_214._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col10"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_214] - PartitionCols:_col0 - Select Operator [SEL_210] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_208] (rows=73049 width=1119) - predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_203] (rows=696954748 width=88) - Conds:RS_24._col0=RS_215._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col10"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_209] (rows=36524 width=1119) - predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_202] (rows=633595212 width=88) - Conds:RS_264._col1, _col2, _col4=RS_228._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] - <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] - PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_227] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_226] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_9] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_264] - PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_263] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_262] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_22_store_returns_sr_customer_sk_min) AND DynamicValue(RS_22_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_22_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_22_store_returns_sr_item_sk_min) AND DynamicValue(RS_22_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_31_store_s_store_sk_min) AND DynamicValue(RS_31_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_31_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_22_store_returns_sr_ticket_number_min) AND DynamicValue(RS_22_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_22_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_245] - Group By Operator [GBY_244] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] - Group By Operator [GBY_218] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_216] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_211] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Group By Operator [GBY_238] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_235] - Group By Operator [GBY_232] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_229] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_227] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_241] - Group By Operator [GBY_240] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_236] - Group By Operator [GBY_233] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_230] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_227] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_243] - Group By Operator [GBY_242] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_237] - Group By Operator [GBY_234] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_231] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_227] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_253] - Group By Operator [GBY_252] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_251] - Group By Operator [GBY_250] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_249] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_247] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_261] - Group By Operator [GBY_260] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] - Group By Operator [GBY_258] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_257] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_255] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_201] (rows=316788826 width=135) - Conds:RS_225._col0=RS_212._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + PartitionCols:_col1, _col2, _col4 + Merge Join Operator [MERGEJOIN_208] (rows=696954748 width=88) + Conds:RS_38._col1=RS_233._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_233] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_210] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - PartitionCols:_col0 - Select Operator [SEL_224] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_223] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_38_d3_d_date_sk_min) AND DynamicValue(RS_38_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_222] - Group By Operator [GBY_221] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] - Group By Operator [GBY_217] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_210] + Please refer to the previous Select Operator [SEL_232] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_207] (rows=633595212 width=88) + Conds:RS_259._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_220] + PartitionCols:_col0 + Select Operator [SEL_217] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_214] (rows=36524 width=1119) + predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_259] + PartitionCols:_col0 + Select Operator [SEL_258] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_257] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_244] + Please refer to the previous Group By Operator [GBY_243] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_247] + Please refer to the previous Group By Operator [GBY_246] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Please refer to the previous Group By Operator [GBY_237] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_256] + Group By Operator [GBY_255] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_254] + Group By Operator [GBY_253] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_252] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_250] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_230] + Group By Operator [GBY_229] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_227] + Group By Operator [GBY_225] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_221] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_217] diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out index dbf0a820e1..5db3c31ea6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out @@ -114,32 +114,32 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) -Map 31 <- Reducer 19 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE) +Map 31 <- Reducer 16 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 31 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 27 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 21 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 30 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 9 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 12 <- Map 25 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 30 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 21 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 24 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 27 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 20 <- Map 22 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 25 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 30 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -153,13 +153,13 @@ Stage-0 predicate:(_col3 > _col4) Merge Join Operator [MERGEJOIN_290] (rows=231911707 width=321) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_380] Select Operator [SEL_379] (rows=1 width=232) Output:["_col0"] Group By Operator [GBY_378] (rows=1 width=232) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_377] Group By Operator [GBY_376] (rows=1 width=232) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] @@ -167,13 +167,13 @@ Stage-0 Output:["_col10"] Group By Operator [GBY_374] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 - <-Reducer 16 [SIMPLE_EDGE] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_78] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Group By Operator [GBY_77] (rows=927646829 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col22, _col17, _col18, _col19, _col20, _col21, _col13, _col14, _col8, _col10 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col11, _col12, _col6, _col8, _col15, _col16, _col17, _col18, _col19, _col22 Merge Join Operator [MERGEJOIN_289] (rows=927646829 width=88) - Conds:RS_73._col11, _col15=RS_355._col1, upper(_col2)(Inner),Output:["_col4","_col8","_col10","_col13","_col14","_col17","_col18","_col19","_col20","_col21","_col22"] + Conds:RS_73._col9, _col13=RS_355._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col22"] <-Map 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_355] PartitionCols:_col1, upper(_col2) @@ -183,126 +183,126 @@ Stage-0 predicate:(ca_zip is not null and upper(ca_country) is not null) TableScan [TS_15] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_state","ca_zip","ca_country"] - <-Reducer 15 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_73] - PartitionCols:_col11, _col15 + PartitionCols:_col9, _col13 Merge Join Operator [MERGEJOIN_288] (rows=843315281 width=88) - Conds:RS_70._col0=RS_317._col0(Inner),Output:["_col4","_col8","_col10","_col11","_col13","_col14","_col15","_col17","_col18","_col19","_col20","_col21"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] - PartitionCols:_col0 - Select Operator [SEL_314] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_312] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] - <-Reducer 14 [SIMPLE_EDGE] + Conds:RS_70._col0, _col3=RS_334._col0, _col1(Inner),Output:["_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + PartitionCols:_col0, _col1 + Select Operator [SEL_330] (rows=57591150 width=77) + Output:["_col0","_col1"] + Filter Operator [FIL_329] (rows=57591150 width=77) + predicate:(sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_12] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_70] - PartitionCols:_col0 + PartitionCols:_col0, _col3 Merge Join Operator [MERGEJOIN_287] (rows=766650239 width=88) - Conds:RS_67._col1=RS_341._col0(Inner),Output:["_col0","_col4","_col8","_col10","_col11","_col13","_col14","_col15"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] + Conds:RS_67._col0=RS_297._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] PartitionCols:_col0 - Select Operator [SEL_338] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_337] (rows=80000000 width=860) - predicate:(c_birth_country is not null and c_customer_sk is not null) - TableScan [TS_12] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 13 [SIMPLE_EDGE] + Select Operator [SEL_294] (rows=462000 width=1436) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_292] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_3] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_67] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_286] (rows=696954748 width=88) - Conds:RS_64._col2=RS_329._col0(Inner),Output:["_col0","_col1","_col4","_col8","_col10","_col11"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] + Conds:RS_64._col1=RS_321._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] PartitionCols:_col0 - Select Operator [SEL_326] (rows=852 width=1910) - Output:["_col0","_col1","_col3","_col4"] - Filter Operator [FIL_325] (rows=852 width=1910) - predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] - <-Reducer 12 [SIMPLE_EDGE] + Select Operator [SEL_318] (rows=80000000 width=860) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_317] (rows=80000000 width=860) + predicate:(c_birth_country is not null and c_customer_sk is not null) + TableScan [TS_9] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_64] - PartitionCols:_col2 + PartitionCols:_col1 Merge Join Operator [MERGEJOIN_285] (rows=633595212 width=88) - Conds:RS_373._col0, _col3=RS_296._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 9 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_296] - PartitionCols:_col0, _col1 - Select Operator [SEL_292] (rows=57591150 width=77) - Output:["_col0","_col1"] - Filter Operator [FIL_291] (rows=57591150 width=77) - predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_3] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] + Conds:RS_373._col2=RS_309._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col8","_col9"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_309] + PartitionCols:_col0 + Select Operator [SEL_306] (rows=852 width=1910) + Output:["_col0","_col1","_col3","_col4"] + Filter Operator [FIL_305] (rows=852 width=1910) + predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) + TableScan [TS_6] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] <-Map 31 [SIMPLE_EDGE] vectorized SHUFFLE [RS_373] - PartitionCols:_col0, _col3 + PartitionCols:_col2 Select Operator [SEL_372] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_371] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_68_customer_c_customer_sk_min) AND DynamicValue(RS_68_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_68_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_62_store_returns_sr_item_sk_min) AND DynamicValue(RS_62_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_62_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_71_item_i_item_sk_min) AND DynamicValue(RS_71_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_71_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_65_store_s_store_sk_min) AND DynamicValue(RS_65_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_65_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_62_store_returns_sr_ticket_number_min) AND DynamicValue(RS_62_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_62_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_65_customer_c_customer_sk_min) AND DynamicValue(RS_65_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_65_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_68_item_i_item_sk_min) AND DynamicValue(RS_68_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_68_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_71_store_returns_sr_item_sk_min) AND DynamicValue(RS_71_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_71_store_returns_sr_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_62_store_s_store_sk_min) AND DynamicValue(RS_62_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_62_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_71_store_returns_sr_ticket_number_min) AND DynamicValue(RS_71_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_71_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_43] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 19 [BROADCAST_EDGE] vectorized + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_366] + Group By Operator [GBY_365] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] + Group By Operator [GBY_300] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_298] (rows=462000 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_294] + <-Reducer 21 [BROADCAST_EDGE] vectorized BROADCAST [RS_362] Group By Operator [GBY_361] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_305] - Group By Operator [GBY_301] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_297] (rows=57591150 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] + Group By Operator [GBY_312] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_310] (rows=852 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_292] - <-Reducer 20 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_306] + <-Reducer 24 [BROADCAST_EDGE] vectorized BROADCAST [RS_364] Group By Operator [GBY_363] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_326] + Group By Operator [GBY_324] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] + Select Operator [SEL_322] (rows=80000000 width=860) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_318] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_368] + Group By Operator [GBY_367] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - Group By Operator [GBY_302] (rows=1 width=12) + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] + Group By Operator [GBY_339] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_298] (rows=57591150 width=77) + Select Operator [SEL_335] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_292] - <-Reducer 23 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_330] + <-Reducer 29 [BROADCAST_EDGE] vectorized BROADCAST [RS_370] Group By Operator [GBY_369] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] - Group By Operator [GBY_320] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_318] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_314] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Group By Operator [GBY_365] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_334] - Group By Operator [GBY_332] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_330] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_326] - <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_368] - Group By Operator [GBY_367] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] - Group By Operator [GBY_344] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_342] (rows=80000000 width=860) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_344] + Group By Operator [GBY_340] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_336] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_338] + Please refer to the previous Select Operator [SEL_330] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_360] Select Operator [SEL_359] (rows=231911707 width=88) @@ -317,115 +317,115 @@ Stage-0 SHUFFLE [RS_35] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_34] (rows=927646829 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col19, _col20, _col14, _col22, _col8, _col9, _col11, _col12, _col16 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col17, _col18, _col12, _col22, _col6, _col7, _col9, _col10, _col14 Merge Join Operator [MERGEJOIN_284] (rows=927646829 width=88) - Conds:RS_30._col17, _col21=RS_354._col1, upper(_col2)(Inner),Output:["_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col19","_col20","_col22"] + Conds:RS_30._col15, _col19=RS_354._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col17","_col18","_col22"] <-Map 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_354] PartitionCols:_col1, upper(_col2) Please refer to the previous Select Operator [SEL_353] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] - PartitionCols:_col17, _col21 + PartitionCols:_col15, _col19 Merge Join Operator [MERGEJOIN_283] (rows=843315281 width=88) - Conds:RS_27._col1=RS_339._col0(Inner),Output:["_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col17","_col19","_col20","_col21"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_338] + Conds:RS_27._col0, _col3=RS_331._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_330] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] - PartitionCols:_col1 + PartitionCols:_col0, _col3 Merge Join Operator [MERGEJOIN_282] (rows=766650239 width=88) - Conds:RS_24._col2=RS_327._col0(Inner),Output:["_col1","_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col17"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] + Conds:RS_24._col1=RS_319._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_319] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_326] + Please refer to the previous Select Operator [SEL_318] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] - PartitionCols:_col2 + PartitionCols:_col1 Merge Join Operator [MERGEJOIN_281] (rows=696954748 width=88) - Conds:RS_21._col0=RS_315._col0(Inner),Output:["_col1","_col2","_col4","_col8","_col9","_col11","_col12"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + Conds:RS_21._col2=RS_307._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] PartitionCols:_col0 - Select Operator [SEL_313] (rows=231000 width=1436) - Output:["_col0","_col1","_col2","_col4","_col5"] - Filter Operator [FIL_311] (rows=231000 width=1436) - predicate:((i_color = 'orchid') and i_item_sk is not null) - Please refer to the previous TableScan [TS_6] + Please refer to the previous Select Operator [SEL_306] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] - PartitionCols:_col0 + PartitionCols:_col2 Merge Join Operator [MERGEJOIN_280] (rows=633595212 width=88) - Conds:RS_351._col0, _col3=RS_293._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] + Conds:RS_351._col0=RS_295._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_293] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_292] + SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_293] (rows=231000 width=1436) + Output:["_col0","_col1","_col2","_col4","_col5"] + Filter Operator [FIL_291] (rows=231000 width=1436) + predicate:((i_color = 'orchid') and i_item_sk is not null) + Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_351] - PartitionCols:_col0, _col3 + PartitionCols:_col0 Select Operator [SEL_350] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_349] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_28_customer_c_customer_sk_min) AND DynamicValue(RS_28_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_28_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_19_store_returns_sr_item_sk_min) AND DynamicValue(RS_19_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_19_store_returns_sr_ticket_number_min) AND DynamicValue(RS_19_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_19_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_25_customer_c_customer_sk_min) AND DynamicValue(RS_25_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_25_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_store_returns_sr_item_sk_min) AND DynamicValue(RS_28_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_store_returns_sr_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_28_store_returns_sr_ticket_number_min) AND DynamicValue(RS_28_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_28_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + BROADCAST [RS_304] + Group By Operator [GBY_303] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] + SHUFFLE [RS_301] Group By Operator [GBY_299] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_294] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_292] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_310] - Group By Operator [GBY_309] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_304] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_295] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_292] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_324] - Group By Operator [GBY_323] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] - Group By Operator [GBY_319] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_316] (rows=231000 width=1436) + Select Operator [SEL_296] (rows=231000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_313] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_336] - Group By Operator [GBY_335] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_293] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_316] + Group By Operator [GBY_315] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] - Group By Operator [GBY_331] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_313] + Group By Operator [GBY_311] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_328] (rows=852 width=1910) + Select Operator [SEL_308] (rows=852 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_326] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_348] - Group By Operator [GBY_347] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_306] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_328] + Group By Operator [GBY_327] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] - Group By Operator [GBY_343] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_325] + Group By Operator [GBY_323] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_340] (rows=80000000 width=860) + Select Operator [SEL_320] (rows=80000000 width=860) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_318] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_346] + Group By Operator [GBY_345] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_341] + Group By Operator [GBY_337] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_332] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_330] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_348] + Group By Operator [GBY_347] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_342] + Group By Operator [GBY_338] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_333] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_338] + Please refer to the previous Select Operator [SEL_330] diff --git a/ql/src/test/results/clientpositive/perf/tez/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/query25.q.out index 231e09fb92..dc15890b24 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query25.q.out @@ -109,221 +109,220 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Map 13 <- Reducer 12 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) -Reducer 10 <- Map 19 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 7 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 16 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) +Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 17 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_268] - Limit [LIM_267] (rows=100 width=88) + Reducer 7 vectorized + File Output Operator [FS_270] + Limit [LIM_269] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_266] (rows=510205767 width=88) + Select Operator [SEL_268] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] - Group By Operator [GBY_264] (rows=510205767 width=88) + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_267] + Group By Operator [GBY_266] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_48] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_47] (rows=1020411534 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col18)","sum(_col23)","sum(_col3)"],keys:_col28, _col29, _col8, _col9 - Top N Key Operator [TNK_94] (rows=1020411534 width=88) - keys:_col28, _col29, _col8, _col9,sort order:++++,top n:100 - Merge Join Operator [MERGEJOIN_206] (rows=1020411534 width=88) - Conds:RS_43._col14=RS_255._col0(Inner),Output:["_col3","_col8","_col9","_col18","_col23","_col28","_col29"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_255] + Group By Operator [GBY_48] (rows=843315281 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","sum(_col20)","sum(_col12)"],keys:_col25, _col26, _col28, _col29 + Top N Key Operator [TNK_95] (rows=843315281 width=88) + keys:_col25, _col26, _col28, _col29,sort order:++++,top n:100 + Merge Join Operator [MERGEJOIN_214] (rows=843315281 width=88) + Conds:RS_44._col3=RS_252._col0(Inner),Output:["_col5","_col12","_col20","_col25","_col26","_col28","_col29"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_252] PartitionCols:_col0 - Select Operator [SEL_254] (rows=462000 width=1436) + Select Operator [SEL_251] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_253] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_34] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col14 - Merge Join Operator [MERGEJOIN_205] (rows=927646829 width=88) - Conds:RS_40._col1, _col2=RS_41._col14, _col13(Inner),Output:["_col3","_col8","_col9","_col14","_col18","_col23"] - <-Reducer 10 [SIMPLE_EDGE] + Filter Operator [FIL_250] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_32] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_213] (rows=766650239 width=88) + Conds:RS_41._col1=RS_243._col0(Inner),Output:["_col3","_col5","_col12","_col20","_col25","_col26"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_243] + PartitionCols:_col0 + Select Operator [SEL_242] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_241] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_29] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] - PartitionCols:_col14, _col13 - Select Operator [SEL_33] (rows=843315281 width=88) - Output:["_col1","_col2","_col7","_col11","_col13","_col14","_col16"] - Merge Join Operator [MERGEJOIN_204] (rows=843315281 width=88) - Conds:RS_30._col3=RS_247._col0(Inner),Output:["_col1","_col5","_col7","_col8","_col10","_col18","_col19"] - <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_247] - PartitionCols:_col0 - Select Operator [SEL_246] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_245] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_18] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_203] (rows=766650239 width=88) - Conds:RS_27._col6=RS_213._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col10"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_213] - PartitionCols:_col0 - Select Operator [SEL_209] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_207] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_202] (rows=696954748 width=88) - Conds:RS_24._col0=RS_214._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col10"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_214] - PartitionCols:_col0 - Select Operator [SEL_210] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_208] (rows=18262 width=1119) - predicate:((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_201] (rows=633595212 width=88) - Conds:RS_263._col1, _col2, _col4=RS_227._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] - <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_227] - PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_226] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_225] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_9] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_263] - PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_262] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_261] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_22_store_returns_sr_customer_sk_min) AND DynamicValue(RS_22_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_22_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_22_store_returns_sr_item_sk_min) AND DynamicValue(RS_22_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_31_store_s_store_sk_min) AND DynamicValue(RS_31_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_31_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_22_store_returns_sr_ticket_number_min) AND DynamicValue(RS_22_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_22_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_244] - Group By Operator [GBY_243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] - Group By Operator [GBY_217] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_215] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_210] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_238] - Group By Operator [GBY_237] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_234] - Group By Operator [GBY_231] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_228] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_226] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_240] - Group By Operator [GBY_239] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_235] - Group By Operator [GBY_232] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_229] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_226] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_242] - Group By Operator [GBY_241] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_236] - Group By Operator [GBY_233] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_230] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_226] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_252] - Group By Operator [GBY_251] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_250] - Group By Operator [GBY_249] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_248] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_246] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_260] - Group By Operator [GBY_259] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_258] - Group By Operator [GBY_257] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_256] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_254] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_200] (rows=316788826 width=135) - Conds:RS_224._col0=RS_211._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_211] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_209] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] - PartitionCols:_col0 - Select Operator [SEL_223] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_222] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_38_d3_d_date_sk_min) AND DynamicValue(RS_38_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_221] - Group By Operator [GBY_220] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_218] - Group By Operator [GBY_216] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_212] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_209] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_212] (rows=696954748 width=88) + Conds:RS_38._col1, _col2, _col4=RS_39._col8, _col9, _col10(Inner),Output:["_col1","_col3","_col5","_col12","_col20"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col8, _col9, _col10 + Merge Join Operator [MERGEJOIN_211] (rows=348467716 width=135) + Conds:RS_25._col2, _col1=RS_26._col1, _col2(Inner),Output:["_col3","_col8","_col9","_col10","_col11"] + <-Reducer 13 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_26] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_210] (rows=63350266 width=77) + Conds:RS_234._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_220] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_217] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + PartitionCols:_col0 + Select Operator [SEL_233] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_232] (rows=57591150 width=77) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_12] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col2, _col1 + Merge Join Operator [MERGEJOIN_209] (rows=316788826 width=135) + Conds:RS_265._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_223] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_216] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) + Please refer to the previous TableScan [TS_3] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_265] + PartitionCols:_col0 + Select Operator [SEL_264] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_263] (rows=287989836 width=135) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_23_d3_d_date_sk_min) AND DynamicValue(RS_23_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_237] + Group By Operator [GBY_235] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_106] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_210] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_240] + Group By Operator [GBY_238] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_123] + Group By Operator [GBY_122] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_121] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_210] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_249] + Group By Operator [GBY_247] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + Group By Operator [GBY_245] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_244] (rows=462000 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_242] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_262] + Group By Operator [GBY_261] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_229] + Group By Operator [GBY_227] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_224] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_219] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col1, _col2, _col4 + Merge Join Operator [MERGEJOIN_208] (rows=633595212 width=88) + Conds:RS_260._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_221] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_215] (rows=18262 width=1119) + predicate:((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_260] + PartitionCols:_col0 + Select Operator [SEL_259] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_258] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_236] + Please refer to the previous Group By Operator [GBY_235] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Please refer to the previous Group By Operator [GBY_238] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_248] + Please refer to the previous Group By Operator [GBY_247] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_257] + Group By Operator [GBY_256] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] + Group By Operator [GBY_254] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_253] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_251] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_231] + Group By Operator [GBY_230] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_228] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_222] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_218] diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out index 8dad8bee64..b291bb4ba3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -107,226 +107,226 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Map 9 <- Reducer 15 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) -Reducer 10 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Map 8 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) +Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 20 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_270] - Limit [LIM_269] (rows=100 width=88) + Reducer 5 vectorized + File Output Operator [FS_260] + Limit [LIM_259] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_268] (rows=510205767 width=88) + Select Operator [SEL_258] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_267] - Group By Operator [GBY_266] (rows=510205767 width=88) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] + Group By Operator [GBY_256] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_48] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_47] (rows=1020411534 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col17)","sum(_col22)","sum(_col3)"],keys:_col27, _col28, _col7, _col8 - Top N Key Operator [TNK_94] (rows=1020411534 width=88) - keys:_col27, _col28, _col7, _col8,sort order:++++,top n:100 - Merge Join Operator [MERGEJOIN_206] (rows=1020411534 width=88) - Conds:RS_43._col13=RS_257._col0(Inner),Output:["_col3","_col7","_col8","_col17","_col22","_col27","_col28"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] - PartitionCols:_col0 - Select Operator [SEL_256] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_255] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_34] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col13 - Merge Join Operator [MERGEJOIN_205] (rows=927646829 width=88) - Conds:RS_40._col1, _col2=RS_41._col14, _col13(Inner),Output:["_col3","_col7","_col8","_col13","_col17","_col22"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col14, _col13 - Select Operator [SEL_33] (rows=843315281 width=88) - Output:["_col1","_col2","_col7","_col11","_col13","_col14","_col16"] - Merge Join Operator [MERGEJOIN_204] (rows=843315281 width=88) - Conds:RS_30._col3=RS_249._col0(Inner),Output:["_col1","_col5","_col7","_col8","_col10","_col18","_col19"] + Group By Operator [GBY_48] (rows=927646829 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col14)","sum(_col22)","sum(_col3)"],keys:_col7, _col8, _col27, _col28 + Top N Key Operator [TNK_93] (rows=927646829 width=88) + keys:_col7, _col8, _col27, _col28,sort order:++++,top n:100 + Merge Join Operator [MERGEJOIN_205] (rows=927646829 width=88) + Conds:RS_44._col1, _col2=RS_45._col14, _col13(Inner),Output:["_col3","_col7","_col8","_col14","_col22","_col27","_col28"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col14, _col13 + Select Operator [SEL_40] (rows=843315281 width=88) + Output:["_col1","_col2","_col8","_col13","_col14","_col16","_col21","_col22"] + Merge Join Operator [MERGEJOIN_204] (rows=843315281 width=88) + Conds:RS_37._col3=RS_247._col0(Inner),Output:["_col5","_col10","_col11","_col13","_col18","_col19","_col21","_col22"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_247] + PartitionCols:_col0 + Select Operator [SEL_246] (rows=1704 width=1910) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_245] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_25] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_203] (rows=766650239 width=88) + Conds:RS_34._col1=RS_239._col0(Inner),Output:["_col3","_col5","_col10","_col11","_col13","_col18","_col19"] <-Map 20 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_249] + PARTITION_ONLY_SHUFFLE [RS_239] PartitionCols:_col0 - Select Operator [SEL_248] (rows=1704 width=1910) + Select Operator [SEL_238] (rows=462000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_247] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_18] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_203] (rows=766650239 width=88) - Conds:RS_27._col6=RS_240._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col10"] - <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_240] - PartitionCols:_col0 - Select Operator [SEL_238] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_236] (rows=36524 width=1119) - predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) - TableScan [TS_15] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_202] (rows=696954748 width=88) - Conds:RS_24._col0=RS_241._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col10"] - <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_241] + Filter Operator [FIL_237] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_22] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_202] (rows=696954748 width=88) + Conds:RS_31._col1, _col2, _col4=RS_32._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col10","_col11","_col13"] + <-Reducer 15 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_32] + PartitionCols:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_201] (rows=63350266 width=77) + Conds:RS_230._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_239] (rows=18262 width=1119) + Select Operator [SEL_220] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_237] (rows=18262 width=1119) + Filter Operator [FIL_218] (rows=36524 width=1119) + predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) + TableScan [TS_9] (rows=73049 width=1119) + default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] + PartitionCols:_col0 + Select Operator [SEL_229] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_228] (rows=57591150 width=77) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_12] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col1, _col2, _col4 + Merge Join Operator [MERGEJOIN_200] (rows=633595212 width=88) + Conds:RS_255._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_217] (rows=18262 width=1119) predicate:((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) - Please refer to the previous TableScan [TS_15] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_24] + Please refer to the previous TableScan [TS_9] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_201] (rows=633595212 width=88) - Conds:RS_265._col1, _col2, _col4=RS_220._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] - <-Map 14 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_220] - PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_219] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_218] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_9] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] - PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_264] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_263] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_22_store_returns_sr_customer_sk_min) AND DynamicValue(RS_22_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_22_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_22_store_returns_sr_item_sk_min) AND DynamicValue(RS_22_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_31_store_s_store_sk_min) AND DynamicValue(RS_31_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_31_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_22_store_returns_sr_ticket_number_min) AND DynamicValue(RS_22_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_22_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_231] - Group By Operator [GBY_230] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_227] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_221] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_233] - Group By Operator [GBY_232] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] - Group By Operator [GBY_225] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_222] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_235] - Group By Operator [GBY_234] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_229] - Group By Operator [GBY_226] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_223] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_246] - Group By Operator [GBY_245] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_244] - Group By Operator [GBY_243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_242] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_239] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_254] - Group By Operator [GBY_253] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_252] - Group By Operator [GBY_251] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_250] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_248] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_262] - Group By Operator [GBY_261] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_260] - Group By Operator [GBY_259] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_258] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_256] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_200] (rows=316788826 width=135) - Conds:RS_217._col0=RS_209._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_209] - PartitionCols:_col0 - Select Operator [SEL_208] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_207] (rows=73049 width=1119) - predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] - PartitionCols:_col0 - Select Operator [SEL_216] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_215] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_38_d3_d_date_sk_min) AND DynamicValue(RS_38_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_214] - Group By Operator [GBY_213] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_212] - Group By Operator [GBY_211] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_210] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_208] + Select Operator [SEL_254] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_253] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_32_store_returns_sr_customer_sk_min) AND DynamicValue(RS_32_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_32_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_32_store_returns_sr_item_sk_min) AND DynamicValue(RS_32_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_32_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_35_item_i_item_sk_min) AND DynamicValue(RS_35_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_35_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_29_d1_d_date_sk_min) AND DynamicValue(RS_29_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_38_store_s_store_sk_min) AND DynamicValue(RS_38_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_38_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_32_store_returns_sr_ticket_number_min) AND DynamicValue(RS_32_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_32_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_6] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_227] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_222] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_219] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_232] + Group By Operator [GBY_231] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_121] + Group By Operator [GBY_120] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_119] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_201] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_234] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_126] + Group By Operator [GBY_125] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_124] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_201] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_131] + Group By Operator [GBY_130] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_129] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_201] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_244] + Group By Operator [GBY_243] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_242] + Group By Operator [GBY_241] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_240] (rows=462000 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_238] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_252] + Group By Operator [GBY_251] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_250] + Group By Operator [GBY_249] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_248] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_246] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_199] (rows=316788826 width=135) + Conds:RS_216._col0=RS_208._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_208] + PartitionCols:_col0 + Select Operator [SEL_207] (rows=73049 width=1119) + Output:["_col0"] + Filter Operator [FIL_206] (rows=73049 width=1119) + predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_214] (rows=287989836 width=135) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_213] + Group By Operator [GBY_212] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_209] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_207] diff --git a/ql/src/test/results/clientpositive/perf/tez/query50.q.out b/ql/src/test/results/clientpositive/perf/tez/query50.q.out index 61c21dd1c3..8cd6f88746 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query50.q.out @@ -127,157 +127,157 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 10 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 12 <- Reducer 10 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_165] - Limit [LIM_164] (rows=100 width=88) + File Output Operator [FS_156] + Limit [LIM_155] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_163] (rows=421657640 width=88) + Select Operator [SEL_154] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] - Group By Operator [GBY_161] (rows=421657640 width=88) + SHUFFLE [RS_153] + Group By Operator [GBY_152] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_29] (rows=843315281 width=88) + Group By Operator [GBY_29] (rows=766650239 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Top N Key Operator [TNK_56] (rows=843315281 width=88) + Top N Key Operator [TNK_56] (rows=766650239 width=88) keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9,sort order:++++++++++,top n:100 - Select Operator [SEL_27] (rows=843315281 width=88) + Select Operator [SEL_27] (rows=766650239 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_120] (rows=843315281 width=88) - Conds:RS_24._col3=RS_149._col0(Inner),Output:["_col0","_col5","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + Merge Join Operator [MERGEJOIN_120] (rows=766650239 width=88) + Conds:RS_24._col10=RS_143._col0(Inner),Output:["_col0","_col7","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_148] (rows=1704 width=1910) + Select Operator [SEL_142] (rows=1704 width=1910) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_147] (rows=1704 width=1910) + Filter Operator [FIL_141] (rows=1704 width=1910) predicate:s_store_sk is not null TableScan [TS_12] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_119] (rows=766650239 width=88) - Conds:RS_21._col0=RS_141._col0(Inner),Output:["_col0","_col3","_col5"] + PartitionCols:_col10 + Merge Join Operator [MERGEJOIN_119] (rows=696954748 width=88) + Conds:RS_21._col7=RS_135._col0(Inner),Output:["_col0","_col7","_col10"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_140] (rows=73049 width=1119) + Select Operator [SEL_134] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_139] (rows=73049 width=1119) + Filter Operator [FIL_133] (rows=73049 width=1119) predicate:d_date_sk is not null TableScan [TS_9] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_118] (rows=696954748 width=88) - Conds:RS_18._col5=RS_160._col0(Inner),Output:["_col0","_col3","_col5"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] - PartitionCols:_col0 - Select Operator [SEL_159] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_158] (rows=18262 width=1119) - predicate:((d_moy = 9) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_118] (rows=633595212 width=88) + Conds:RS_18._col1, _col2, _col3=RS_151._col1, _col2, _col4(Inner),Output:["_col0","_col7","_col10"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_117] (rows=633595212 width=88) - Conds:RS_157._col1, _col2, _col4=RS_123._col1, _col2, _col3(Inner),Output:["_col0","_col3","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_123] - PartitionCols:_col1, _col2, _col3 + PARTITION_ONLY_SHUFFLE [RS_18] + PartitionCols:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_117] (rows=63350266 width=77) + Conds:RS_123._col0=RS_126._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + PartitionCols:_col0 Select Operator [SEL_122] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_121] (rows=57591150 width=77) predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_3] (rows=57591150 width=77) + TableScan [TS_0] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_157] - PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_156] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_155] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_16_store_returns_sr_customer_sk_min) AND DynamicValue(RS_16_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_16_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_16_store_returns_sr_item_sk_min) AND DynamicValue(RS_16_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_16_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_d1_d_date_sk_min) AND DynamicValue(RS_22_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_16_store_returns_sr_ticket_number_min) AND DynamicValue(RS_16_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_16_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_136] - Group By Operator [GBY_135] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_131] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_125] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_122] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_138] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + PartitionCols:_col0 + Select Operator [SEL_125] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_124] (rows=18262 width=1119) + predicate:((d_moy = 9) and (d_year = 2000) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_150] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_149] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_18_store_returns_sr_customer_sk_min) AND DynamicValue(RS_18_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_18_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_18_store_returns_sr_item_sk_min) AND DynamicValue(RS_18_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_d1_d_date_sk_min) AND DynamicValue(RS_22_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_18_store_returns_sr_ticket_number_min) AND DynamicValue(RS_18_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_18_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_6] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_92] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_117] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_140] + Group By Operator [GBY_139] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] Group By Operator [GBY_137] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_132] - Group By Operator [GBY_129] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_126] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_122] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_146] + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_136] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_134] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_148] + Group By Operator [GBY_147] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_146] Group By Operator [GBY_145] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_142] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_140] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_154] - Group By Operator [GBY_153] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] - Group By Operator [GBY_151] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_150] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_148] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_130] - Group By Operator [GBY_127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_124] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_122] + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_144] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_142] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_128] + Group By Operator [GBY_127] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_84] + Group By Operator [GBY_83] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_82] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_117] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_87] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_117] diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index fb70fc9989..1c17d2a53a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product -Warning: Shuffle Join MERGEJOIN[280][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[281][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -133,324 +133,321 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 22 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 15 (CONTAINS) -Map 20 <- Reducer 22 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Union 15 (CONTAINS) -Map 27 <- Reducer 26 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Map 17 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Union 18 (CONTAINS) +Map 23 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Union 18 (CONTAINS) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 16 <- Map 21 (SIMPLE_EDGE), Union 15 (SIMPLE_EDGE) -Reducer 17 <- Map 23 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 25 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 28 (SIMPLE_EDGE) -Reducer 3 <- Reducer 19 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Map 28 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 19 <- Map 24 (SIMPLE_EDGE), Union 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (SIMPLE_EDGE) Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 28 (SIMPLE_EDGE) -Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Map 27 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 34 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 30 (SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Map 30 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 35 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 12 vectorized - File Output Operator [FS_371] - Limit [LIM_370] (rows=100 width=158) + Reducer 10 vectorized + File Output Operator [FS_360] + Limit [LIM_359] (rows=100 width=158) Number of rows:100 - Select Operator [SEL_369] (rows=1614130953450400 width=158) + Select Operator [SEL_358] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_368] - Select Operator [SEL_367] (rows=1614130953450400 width=158) + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_357] + Select Operator [SEL_356] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_366] (rows=1614130953450400 width=158) + Group By Operator [GBY_355] (rows=1614130953450400 width=158) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_365] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_354] PartitionCols:_col0 - Group By Operator [GBY_364] (rows=3228261906900801 width=158) + Group By Operator [GBY_353] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_363] (rows=3228261906900801 width=158) + Select Operator [SEL_352] (rows=3228261906900801 width=158) Output:["_col0"] - Group By Operator [GBY_362] (rows=3228261906900801 width=158) + Group By Operator [GBY_351] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_118] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_119] PartitionCols:_col0 - Group By Operator [GBY_117] (rows=6456523813801603 width=158) + Group By Operator [GBY_118] (rows=6456523813801603 width=158) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_116] (rows=6456523813801603 width=158) + Select Operator [SEL_117] (rows=6456523813801603 width=158) Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=6456523813801603 width=158) + Filter Operator [FIL_116] (rows=6456523813801603 width=158) predicate:_col2 BETWEEN _col3 AND _col4 - Merge Join Operator [MERGEJOIN_281] (rows=58108714324214428 width=158) + Merge Join Operator [MERGEJOIN_273] (rows=58108714324214428 width=158) Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_361] - Group By Operator [GBY_360] (rows=9131 width=1119) + <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_350] + Group By Operator [GBY_349] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] PartitionCols:_col0 - Group By Operator [GBY_339] (rows=18262 width=1119) + Group By Operator [GBY_328] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_336] (rows=18262 width=1119) + Select Operator [SEL_325] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_333] (rows=18262 width=1119) + Filter Operator [FIL_322] (rows=18262 width=1119) predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_40] (rows=73049 width=1119) + TableScan [TS_50] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_112] - Select Operator [SEL_103] (rows=6363893803988 width=1226) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_113] + Select Operator [SEL_104] (rows=6363893803988 width=1226) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_280] (rows=6363893803988 width=1226) + Merge Join Operator [MERGEJOIN_272] (rows=6363893803988 width=1226) Conds:(Inner),Output:["_col0","_col4","_col11","_col13"] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_359] - Select Operator [SEL_358] (rows=1 width=8) - Filter Operator [FIL_357] (rows=1 width=8) + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + Select Operator [SEL_347] (rows=1 width=8) + Filter Operator [FIL_346] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_356] (rows=1 width=8) + Group By Operator [GBY_345] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_355] - Group By Operator [GBY_354] (rows=1 width=8) + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + Group By Operator [GBY_343] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_353] (rows=9131 width=1119) - Group By Operator [GBY_352] (rows=9131 width=1119) + Select Operator [SEL_342] (rows=9131 width=1119) + Group By Operator [GBY_341] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] PartitionCols:_col0 - Group By Operator [GBY_338] (rows=18262 width=1119) + Group By Operator [GBY_327] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_335] (rows=18262 width=1119) + Select Operator [SEL_324] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_333] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_100] - Select Operator [SEL_84] (rows=6363893803988 width=1217) + Please refer to the previous Filter Operator [FIL_322] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_101] + Select Operator [SEL_85] (rows=6363893803988 width=1217) Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_279] (rows=6363893803988 width=1217) - Conds:(Left Outer),Output:["_col5","_col9","_col12","_col13"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_345] - Group By Operator [GBY_343] (rows=9131 width=1119) + Merge Join Operator [MERGEJOIN_271] (rows=6363893803988 width=1217) + Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_334] + Group By Operator [GBY_332] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_340] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] PartitionCols:_col0 - Group By Operator [GBY_337] (rows=18262 width=1119) + Group By Operator [GBY_326] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_334] (rows=18262 width=1119) + Select Operator [SEL_323] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_333] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_81] - Merge Join Operator [MERGEJOIN_278] (rows=696954748 width=97) - Conds:RS_78._col7=RS_324._col0(Inner),Output:["_col5","_col9","_col12"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] - PartitionCols:_col0 - Select Operator [SEL_323] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_322] (rows=73049 width=1119) - predicate:d_date_sk is not null - TableScan [TS_55] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_78] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_277] (rows=633595212 width=97) - Conds:(Inner),Output:["_col5","_col7","_col9"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_351] - Select Operator [SEL_350] (rows=1 width=8) - Filter Operator [FIL_349] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_348] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_347] - Group By Operator [GBY_346] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_344] (rows=9131 width=1119) - Please refer to the previous Group By Operator [GBY_343] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_75] - Merge Join Operator [MERGEJOIN_276] (rows=633595212 width=88) - Conds:RS_72._col5=RS_332._col1(Inner),Output:["_col5","_col7","_col9"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] + Please refer to the previous Filter Operator [FIL_322] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_82] + Merge Join Operator [MERGEJOIN_270] (rows=696954748 width=97) + Conds:(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_79] + Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=88) + Conds:RS_76._col1=RS_77._col5(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_268] (rows=316240138 width=135) + Conds:RS_46._col0=RS_321._col1(Inner),Output:["_col5"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_264] (rows=44000000 width=1014) + Conds:RS_297._col1, _col2=RS_300._col0, _col1(Inner),Output:["_col0"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] + PartitionCols:_col1, _col2 + Select Operator [SEL_296] (rows=40000000 width=1014) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_295] (rows=40000000 width=1014) + predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_300] + PartitionCols:_col0, _col1 + Select Operator [SEL_299] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_298] (rows=1704 width=1910) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_9] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] + <-Reducer 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] PartitionCols:_col1 - Select Operator [SEL_331] (rows=575995635 width=88) + Select Operator [SEL_320] (rows=287491029 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_319] (rows=287491029 width=135) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1 + Group By Operator [GBY_39] (rows=574982058 width=135) + Output:["_col0","_col1"],keys:_col10, _col9 + Merge Join Operator [MERGEJOIN_267] (rows=574982058 width=135) + Conds:RS_35._col1=RS_315._col0(Inner),Output:["_col9","_col10"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_315] + PartitionCols:_col0 + Select Operator [SEL_314] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_313] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_26] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_266] (rows=522710951 width=135) + Conds:RS_32._col2=RS_309._col0(Inner),Output:["_col1"] + <-Map 26 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_309] + PartitionCols:_col0 + Select Operator [SEL_308] (rows=115500 width=1436) + Output:["_col0"] + Filter Operator [FIL_307] (rows=115500 width=1436) + predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) + TableScan [TS_23] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_265] (rows=475191764 width=135) + Conds:Union 18._col0=RS_303._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_303] + PartitionCols:_col0 + Select Operator [SEL_302] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_301] (rows=18262 width=1119) + predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_20] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Union 18 [SIMPLE_EDGE] + <-Map 17 [CONTAINS] vectorized + Reduce Output Operator [RS_371] + PartitionCols:_col0 + Select Operator [SEL_370] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_369] (rows=287989836 width=135) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_36_customer_c_customer_sk_min) AND DynamicValue(RS_36_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_36_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_274] (rows=287989836 width=135) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_362] + Group By Operator [GBY_361] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_304] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_302] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_365] + Group By Operator [GBY_364] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_312] + Group By Operator [GBY_311] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_310] (rows=115500 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_308] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_368] + Group By Operator [GBY_367] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_318] + Group By Operator [GBY_317] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] + Select Operator [SEL_316] (rows=80000000 width=860) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_314] + <-Map 23 [CONTAINS] vectorized + Reduce Output Operator [RS_374] + PartitionCols:_col0 + Select Operator [SEL_373] (rows=144002668 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_372] (rows=144002668 width=135) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_279] (rows=144002668 width=135) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_363] + Please refer to the previous Group By Operator [GBY_361] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_366] + Please refer to the previous Group By Operator [GBY_364] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_263] (rows=633595212 width=88) + Conds:RS_294._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] + PartitionCols:_col0 + Select Operator [SEL_285] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_284] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] + PartitionCols:_col0 + Select Operator [SEL_293] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_330] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_30_customer_c_customer_sk_min) AND DynamicValue(RS_30_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_30_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_79_date_dim_d_date_sk_min) AND DynamicValue(RS_79_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_79_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_37] (rows=575995635 width=88) + Filter Operator [FIL_292] (rows=575995635 width=88) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_321] - Group By Operator [GBY_319] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_315] - Group By Operator [GBY_314] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_313] (rows=80000000 width=860) - Output:["_col0"] - Select Operator [SEL_311] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_310] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_20] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_329] - Group By Operator [GBY_328] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_291] + Group By Operator [GBY_290] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] - Group By Operator [GBY_326] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_325] (rows=73049 width=1119) + Select Operator [SEL_287] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_323] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_72] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_275] (rows=316240138 width=135) - Conds:RS_69._col0=RS_318._col1(Inner),Output:["_col5"] - <-Reducer 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] - PartitionCols:_col1 - Select Operator [SEL_317] (rows=287491029 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_316] (rows=287491029 width=135) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0, _col1 - Group By Operator [GBY_33] (rows=574982058 width=135) - Output:["_col0","_col1"],keys:_col10, _col9 - Merge Join Operator [MERGEJOIN_274] (rows=574982058 width=135) - Conds:RS_29._col1=RS_312._col0(Inner),Output:["_col9","_col10"] - <-Map 25 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_312] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_311] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_273] (rows=522710951 width=135) - Conds:RS_26._col2=RS_306._col0(Inner),Output:["_col1"] - <-Map 23 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - PartitionCols:_col0 - Select Operator [SEL_305] (rows=115500 width=1436) - Output:["_col0"] - Filter Operator [FIL_304] (rows=115500 width=1436) - predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) - TableScan [TS_17] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_272] (rows=475191764 width=135) - Conds:Union 15._col0=RS_300._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_300] - PartitionCols:_col0 - Select Operator [SEL_299] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_298] (rows=18262 width=1119) - predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_14] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Union 15 [SIMPLE_EDGE] - <-Map 14 [CONTAINS] vectorized - Reduce Output Operator [RS_380] - PartitionCols:_col0 - Select Operator [SEL_379] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_378] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_30_customer_c_customer_sk_min) AND DynamicValue(RS_30_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_30_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_27_item_i_item_sk_min) AND DynamicValue(RS_27_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_27_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_24_date_dim_d_date_sk_min) AND DynamicValue(RS_24_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_24_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_282] (rows=287989836 width=135) - Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_373] - Group By Operator [GBY_372] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] - Group By Operator [GBY_302] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_301] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_299] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_376] - Group By Operator [GBY_375] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_309] - Group By Operator [GBY_308] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_307] (rows=115500 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_305] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_320] - Please refer to the previous Group By Operator [GBY_319] - <-Map 20 [CONTAINS] vectorized - Reduce Output Operator [RS_383] - PartitionCols:_col0 - Select Operator [SEL_382] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_381] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_27_item_i_item_sk_min) AND DynamicValue(RS_27_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_27_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_24_date_dim_d_date_sk_min) AND DynamicValue(RS_24_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_24_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_287] (rows=144002668 width=135) - Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_374] - Please refer to the previous Group By Operator [GBY_372] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_377] - Please refer to the previous Group By Operator [GBY_375] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_271] (rows=44000000 width=1014) - Conds:RS_294._col1, _col2=RS_297._col0, _col1(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] - PartitionCols:_col1, _col2 - Select Operator [SEL_293] (rows=40000000 width=1014) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_292] (rows=40000000 width=1014) - predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) - TableScan [TS_0] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] - PartitionCols:_col0, _col1 - Select Operator [SEL_296] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_295] (rows=1704 width=1910) - predicate:(s_county is not null and s_state is not null) - TableScan [TS_3] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] + Please refer to the previous Select Operator [SEL_285] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_340] + Select Operator [SEL_339] (rows=1 width=8) + Filter Operator [FIL_338] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_337] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_336] + Group By Operator [GBY_335] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_333] (rows=9131 width=1119) + Please refer to the previous Group By Operator [GBY_332] diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out index e04a1c8e1d..402dadefde 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -265,64 +265,64 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 38 <- Reducer 22 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 48 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 57 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE), Reducer 70 (BROADCAST_EDGE) -Map 54 <- Reducer 42 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 59 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE) -Map 72 <- Reducer 29 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE), Reducer 37 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 49 (BROADCAST_EDGE), Reducer 50 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE), Reducer 68 (BROADCAST_EDGE), Reducer 71 (BROADCAST_EDGE) -Map 73 <- Reducer 45 (BROADCAST_EDGE), Reducer 49 (BROADCAST_EDGE), Reducer 64 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE) +Map 40 <- Reducer 23 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 62 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE), Reducer 66 (BROADCAST_EDGE), Reducer 70 (BROADCAST_EDGE) +Map 49 <- Reducer 43 (BROADCAST_EDGE), Reducer 54 (BROADCAST_EDGE), Reducer 55 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE) +Map 72 <- Reducer 31 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE), Reducer 39 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 48 (BROADCAST_EDGE), Reducer 58 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE), Reducer 68 (BROADCAST_EDGE), Reducer 71 (BROADCAST_EDGE) +Map 73 <- Reducer 45 (BROADCAST_EDGE), Reducer 59 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE) Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 28 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 12 <- Reducer 30 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 13 <- Map 69 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) -Reducer 17 <- Map 51 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 56 (ONE_TO_ONE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) +Reducer 17 <- Map 46 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 51 (ONE_TO_ONE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 20 <- Map 66 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 35 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 15 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) -Reducer 24 <- Map 51 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 25 <- Reducer 24 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 62 (ONE_TO_ONE_EDGE) -Reducer 27 <- Map 66 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 28 <- Map 35 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) -Reducer 29 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 20 <- Map 61 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 37 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 64 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 15 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) +Reducer 25 <- Map 46 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 57 (ONE_TO_ONE_EDGE) +Reducer 28 <- Map 61 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 37 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) -Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Map 35 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 35 (CUSTOM_SIMPLE_EDGE) -Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 40 <- Map 46 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) -Reducer 42 <- Map 41 (CUSTOM_SIMPLE_EDGE) -Reducer 43 <- Map 41 (SIMPLE_EDGE), Map 72 (SIMPLE_EDGE) -Reducer 44 <- Map 46 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE) -Reducer 45 <- Map 41 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 64 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 32 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) +Reducer 38 <- Map 37 (CUSTOM_SIMPLE_EDGE) +Reducer 39 <- Map 37 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) +Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) +Reducer 43 <- Map 42 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 42 (SIMPLE_EDGE), Map 72 (SIMPLE_EDGE) +Reducer 45 <- Map 42 (CUSTOM_SIMPLE_EDGE) Reducer 47 <- Map 46 (CUSTOM_SIMPLE_EDGE) Reducer 48 <- Map 46 (CUSTOM_SIMPLE_EDGE) -Reducer 49 <- Map 46 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Map 35 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Map 46 (CUSTOM_SIMPLE_EDGE) -Reducer 52 <- Map 51 (CUSTOM_SIMPLE_EDGE) -Reducer 53 <- Map 51 (CUSTOM_SIMPLE_EDGE) -Reducer 55 <- Map 54 (SIMPLE_EDGE), Map 58 (SIMPLE_EDGE) -Reducer 56 <- Reducer 55 (SIMPLE_EDGE) -Reducer 57 <- Reducer 56 (CUSTOM_SIMPLE_EDGE) -Reducer 59 <- Map 58 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 37 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 50 <- Map 49 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) +Reducer 51 <- Reducer 50 (SIMPLE_EDGE) +Reducer 52 <- Reducer 51 (CUSTOM_SIMPLE_EDGE) +Reducer 54 <- Map 53 (CUSTOM_SIMPLE_EDGE) +Reducer 55 <- Map 53 (CUSTOM_SIMPLE_EDGE) +Reducer 56 <- Map 53 (SIMPLE_EDGE), Map 73 (SIMPLE_EDGE) +Reducer 57 <- Reducer 56 (SIMPLE_EDGE) +Reducer 58 <- Reducer 57 (CUSTOM_SIMPLE_EDGE) +Reducer 59 <- Map 53 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 69 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 60 <- Map 58 (CUSTOM_SIMPLE_EDGE) -Reducer 61 <- Map 58 (SIMPLE_EDGE), Map 73 (SIMPLE_EDGE) -Reducer 62 <- Reducer 61 (SIMPLE_EDGE) -Reducer 63 <- Reducer 62 (CUSTOM_SIMPLE_EDGE) -Reducer 64 <- Map 58 (CUSTOM_SIMPLE_EDGE) -Reducer 65 <- Map 58 (CUSTOM_SIMPLE_EDGE) -Reducer 67 <- Map 66 (CUSTOM_SIMPLE_EDGE) -Reducer 68 <- Map 66 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 60 <- Map 53 (CUSTOM_SIMPLE_EDGE) +Reducer 62 <- Map 61 (CUSTOM_SIMPLE_EDGE) +Reducer 63 <- Map 61 (CUSTOM_SIMPLE_EDGE) +Reducer 65 <- Map 64 (CUSTOM_SIMPLE_EDGE) +Reducer 66 <- Map 64 (CUSTOM_SIMPLE_EDGE) +Reducer 67 <- Map 64 (CUSTOM_SIMPLE_EDGE) +Reducer 68 <- Map 64 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 70 <- Map 69 (CUSTOM_SIMPLE_EDGE) Reducer 71 <- Map 69 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 69 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) @@ -333,8 +333,8 @@ Stage-0 limit:-1 Stage-1 Reducer 11 vectorized - File Output Operator [FS_1281] - Select Operator [SEL_1280] (rows=273897192 width=88) + File Output Operator [FS_1283] + Select Operator [SEL_1282] (rows=273897192 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_259] @@ -342,688 +342,688 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] Filter Operator [FIL_257] (rows=273897192 width=88) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_1085] (rows=821691577 width=88) - Conds:RS_1237._col2, _col1, _col3=RS_1279._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + Merge Join Operator [MERGEJOIN_1087] (rows=821691577 width=88) + Conds:RS_1239._col2, _col1, _col3=RS_1281._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1279] + SHUFFLE [RS_1281] PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_1278] (rows=746992327 width=88) + Select Operator [SEL_1280] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_1277] (rows=746992327 width=88) + Group By Operator [GBY_1279] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_251] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Group By Operator [GBY_250] (rows=1493984654 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col45)","sum(_col46)","sum(_col47)"],keys:_col26, _col48, _col27, _col7, _col9, _col14, _col15, _col16, _col17, _col21, _col22, _col23, _col24, _col51 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col49 Select Operator [SEL_249] (rows=1493984654 width=88) - Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col45","_col46","_col47","_col48","_col51"] + Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] Filter Operator [FIL_248] (rows=1493984654 width=88) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_1084] (rows=1493984654 width=88) - Conds:RS_245._col39=RS_1127._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col21","_col22","_col23","_col24","_col26","_col27","_col45","_col46","_col47","_col48","_col51","_col56"] + Merge Join Operator [MERGEJOIN_1086] (rows=1493984654 width=88) + Conds:RS_245._col37=RS_1129._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1127] + SHUFFLE [RS_1129] PartitionCols:_col0 - Select Operator [SEL_1123] (rows=1861800 width=385) + Select Operator [SEL_1125] (rows=1861800 width=385) Output:["_col0","_col1"] - Filter Operator [FIL_1122] (rows=1861800 width=385) + Filter Operator [FIL_1124] (rows=1861800 width=385) predicate:cd_demo_sk is not null TableScan [TS_97] (rows=1861800 width=385) default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_245] - PartitionCols:_col39 - Merge Join Operator [MERGEJOIN_1083] (rows=1358167838 width=88) - Conds:RS_242._col0=RS_243._col18(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col21","_col22","_col23","_col24","_col26","_col27","_col39","_col45","_col46","_col47","_col48","_col51"] + PartitionCols:_col37 + Merge Join Operator [MERGEJOIN_1085] (rows=1358167838 width=88) + Conds:RS_242._col0=RS_243._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_242] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1054] (rows=128840811 width=860) - Conds:RS_112._col1=RS_1126._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] + Merge Join Operator [MERGEJOIN_1056] (rows=128840811 width=860) + Conds:RS_112._col1=RS_1128._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1126] + SHUFFLE [RS_1128] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1123] + Please refer to the previous Select Operator [SEL_1125] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_112] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1053] (rows=117128008 width=860) - Conds:RS_109._col3=RS_1113._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] - <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1113] + Merge Join Operator [MERGEJOIN_1055] (rows=117128008 width=860) + Conds:RS_109._col3=RS_1115._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1115] PartitionCols:_col0 - Select Operator [SEL_1112] (rows=40000000 width=1014) + Select Operator [SEL_1114] (rows=40000000 width=1014) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1111] (rows=40000000 width=1014) + Filter Operator [FIL_1113] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_19] (rows=40000000 width=1014) default@customer_address,ad2,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_1052] (rows=106480005 width=860) + Merge Join Operator [MERGEJOIN_1054] (rows=106480005 width=860) Conds:RS_106._col2=RS_107._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] - <-Reducer 31 [SIMPLE_EDGE] + <-Reducer 33 [SIMPLE_EDGE] SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1051] (rows=7920 width=107) - Conds:RS_1107._col1=RS_1110._col0(Inner),Output:["_col0"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1107] + Merge Join Operator [MERGEJOIN_1053] (rows=7920 width=107) + Conds:RS_1109._col1=RS_1112._col0(Inner),Output:["_col0"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1109] PartitionCols:_col1 - Select Operator [SEL_1106] (rows=7200 width=107) + Select Operator [SEL_1108] (rows=7200 width=107) Output:["_col0","_col1"] - Filter Operator [FIL_1105] (rows=7200 width=107) + Filter Operator [FIL_1107] (rows=7200 width=107) predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,hd2,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1110] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1112] PartitionCols:_col0 - Select Operator [SEL_1109] (rows=20 width=12) + Select Operator [SEL_1111] (rows=20 width=12) Output:["_col0"] - Filter Operator [FIL_1108] (rows=20 width=12) + Filter Operator [FIL_1110] (rows=20 width=12) predicate:ib_income_band_sk is not null TableScan [TS_12] (rows=20 width=12) default@income_band,ib2,Tbl:COMPLETE,Col:NONE,Output:["ib_income_band_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_1050] (rows=96800003 width=860) - Conds:RS_103._col4=RS_1096._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] + Merge Join Operator [MERGEJOIN_1052] (rows=96800003 width=860) + Conds:RS_103._col4=RS_1098._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1096] + PARTITION_ONLY_SHUFFLE [RS_1098] PartitionCols:_col0 - Select Operator [SEL_1092] (rows=73049 width=1119) + Select Operator [SEL_1094] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_1089] (rows=73049 width=1119) + Filter Operator [FIL_1091] (rows=73049 width=1119) predicate:d_date_sk is not null TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_1049] (rows=88000001 width=860) - Conds:RS_1088._col5=RS_1095._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] + Merge Join Operator [MERGEJOIN_1051] (rows=88000001 width=860) + Conds:RS_1090._col5=RS_1097._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1095] + PARTITION_ONLY_SHUFFLE [RS_1097] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1092] + Please refer to the previous Select Operator [SEL_1094] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1088] + SHUFFLE [RS_1090] PartitionCols:_col5 - Select Operator [SEL_1087] (rows=80000000 width=860) + Select Operator [SEL_1089] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1086] (rows=80000000 width=860) + Filter Operator [FIL_1088] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] - <-Reducer 28 [SIMPLE_EDGE] + <-Reducer 30 [SIMPLE_EDGE] SHUFFLE [RS_243] - PartitionCols:_col18 + PartitionCols:_col16 Select Operator [SEL_223] (rows=1234698008 width=88) - Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col18","_col19","_col25","_col26","_col27","_col28","_col31"] - Merge Join Operator [MERGEJOIN_1082] (rows=1234698008 width=88) - Conds:RS_220._col13=RS_1116._col0(Inner),Output:["_col10","_col11","_col17","_col18","_col19","_col20","_col23","_col28","_col29","_col31","_col32","_col33","_col34"] - <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1116] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1112] - <-Reducer 27 [SIMPLE_EDGE] + Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] + Merge Join Operator [MERGEJOIN_1084] (rows=1234698008 width=88) + Conds:RS_220._col5, _col12=RS_1190._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 64 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1190] + PartitionCols:_col0, _col1 + Select Operator [SEL_1186] (rows=57591150 width=77) + Output:["_col0","_col1"] + Filter Operator [FIL_1185] (rows=57591150 width=77) + predicate:(sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_75] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 29 [SIMPLE_EDGE] SHUFFLE [RS_220] - PartitionCols:_col13 - Merge Join Operator [MERGEJOIN_1081] (rows=1122452711 width=88) - Conds:RS_217._col14=RS_1220._col0(Inner),Output:["_col10","_col11","_col13","_col17","_col18","_col19","_col20","_col23","_col28","_col29"] - <-Map 66 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1220] + PartitionCols:_col5, _col12 + Merge Join Operator [MERGEJOIN_1083] (rows=1122452711 width=88) + Conds:RS_217._col9=RS_1118._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1118] PartitionCols:_col0 - Select Operator [SEL_1217] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1216] (rows=1704 width=1910) - predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) - TableScan [TS_75] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 26 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_1114] + <-Reducer 28 [SIMPLE_EDGE] SHUFFLE [RS_217] - PartitionCols:_col14 - Merge Join Operator [MERGEJOIN_1080] (rows=1020411534 width=88) - Conds:RS_214._col9=RS_1262._col0(Inner),Output:["_col10","_col11","_col13","_col14","_col17","_col18","_col19","_col20","_col23"] - <-Reducer 62 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1262] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_1082] (rows=1020411534 width=88) + Conds:RS_214._col10=RS_1220._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] + <-Map 61 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1220] PartitionCols:_col0 - Select Operator [SEL_1261] (rows=52798137 width=135) - Output:["_col0"] - Filter Operator [FIL_1260] (rows=52798137 width=135) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1259] (rows=158394413 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 61 [SIMPLE_EDGE] - SHUFFLE [RS_198] - PartitionCols:_col0 - Group By Operator [GBY_197] (rows=316788826 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 - Select Operator [SEL_195] (rows=316788826 width=135) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_1078] (rows=316788826 width=135) - Conds:RS_1258._col0, _col1=RS_1173._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] - <-Map 58 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1173] - PartitionCols:_col0, _col1 - Select Operator [SEL_1169] (rows=28798881 width=106) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1168] (rows=28798881 width=106) - predicate:(cr_item_sk is not null and cr_order_number is not null) - TableScan [TS_62] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] - <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1258] - PartitionCols:_col0, _col1 - Select Operator [SEL_1257] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1256] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_174_item_i_item_sk_min) AND DynamicValue(RS_174_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_174_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_177_store_returns_sr_item_sk_min) AND DynamicValue(RS_177_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_177_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_193_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_193_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_193_catalog_returns_cr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_193_catalog_returns_cr_order_number_min) AND DynamicValue(RS_193_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_193_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_186] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1240] - Group By Operator [GBY_1238] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 41 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1142] - Group By Operator [GBY_1140] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1138] (rows=5703 width=1436) - Output:["_col0"] - Select Operator [SEL_1134] (rows=5703 width=1436) - Output:["_col0","_col3"] - Filter Operator [FIL_1133] (rows=5703 width=1436) - predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) - TableScan [TS_34] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] - <-Reducer 49 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1243] - Group By Operator [GBY_1241] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1161] - Group By Operator [GBY_1157] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1153] (rows=57591150 width=77) - Output:["_col0"] - Select Operator [SEL_1148] (rows=57591150 width=77) - Output:["_col0","_col1"] - Filter Operator [FIL_1147] (rows=57591150 width=77) - predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_37] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 64 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1253] - Group By Operator [GBY_1252] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 58 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1182] - Group By Operator [GBY_1178] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1174] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1169] - <-Reducer 65 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1255] - Group By Operator [GBY_1254] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 58 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1183] - Group By Operator [GBY_1179] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1175] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1169] - <-Reducer 25 [SIMPLE_EDGE] + Select Operator [SEL_1217] (rows=1704 width=1910) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1216] (rows=1704 width=1910) + predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) + TableScan [TS_69] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 27 [SIMPLE_EDGE] SHUFFLE [RS_214] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1079] (rows=927646829 width=88) - Conds:RS_211._col0=RS_212._col9(Inner),Output:["_col9","_col10","_col11","_col13","_col14","_col17","_col18","_col19","_col20","_col23"] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_211] + PartitionCols:_col10 + Merge Join Operator [MERGEJOIN_1081] (rows=927646829 width=88) + Conds:RS_211._col5=RS_1262._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 57 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1262] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1051] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_212] - PartitionCols:_col9 - Select Operator [SEL_185] (rows=843315281 width=88) - Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col14","_col15","_col16","_col17","_col20"] - Merge Join Operator [MERGEJOIN_1077] (rows=843315281 width=88) - Conds:RS_182._col7=RS_1206._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col9","_col10","_col11","_col12","_col15"] - <-Map 51 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1206] - PartitionCols:_col0 - Select Operator [SEL_1203] (rows=2300 width=1179) - Output:["_col0"] - Filter Operator [FIL_1202] (rows=2300 width=1179) - predicate:p_promo_sk is not null - TableScan [TS_43] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_182] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1076] (rows=766650239 width=88) - Conds:RS_179._col0=RS_1099._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col15"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1099] + Select Operator [SEL_1261] (rows=52798137 width=135) + Output:["_col0"] + Filter Operator [FIL_1260] (rows=52798137 width=135) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1259] (rows=158394413 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 56 [SIMPLE_EDGE] + SHUFFLE [RS_192] + PartitionCols:_col0 + Group By Operator [GBY_191] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_189] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_1079] (rows=316788826 width=135) + Conds:RS_1258._col0, _col1=RS_1170._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] + <-Map 53 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1170] + PartitionCols:_col0, _col1 + Select Operator [SEL_1166] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_1165] (rows=28798881 width=106) + predicate:(cr_item_sk is not null and cr_order_number is not null) + TableScan [TS_56] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] + <-Map 73 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1258] + PartitionCols:_col0, _col1 + Select Operator [SEL_1257] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1256] (rows=287989836 width=135) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_187_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_187_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_187_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_187_catalog_returns_cr_order_number_min) AND DynamicValue(RS_187_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_187_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_180] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1242] + Group By Operator [GBY_1240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1144] + Group By Operator [GBY_1142] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1140] (rows=5703 width=1436) + Output:["_col0"] + Select Operator [SEL_1136] (rows=5703 width=1436) + Output:["_col0","_col3"] + Filter Operator [FIL_1135] (rows=5703 width=1436) + predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) + TableScan [TS_34] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] + <-Reducer 67 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1255] + Group By Operator [GBY_1253] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1199] + Group By Operator [GBY_1195] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1191] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 59 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1250] + Group By Operator [GBY_1249] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1179] + Group By Operator [GBY_1175] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1171] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1166] + <-Reducer 60 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1252] + Group By Operator [GBY_1251] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1180] + Group By Operator [GBY_1176] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1172] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1166] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_211] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1080] (rows=843315281 width=88) + Conds:RS_208._col0=RS_209._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 33 [SIMPLE_EDGE] + SHUFFLE [RS_208] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_1053] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_209] + PartitionCols:_col5 + Select Operator [SEL_179] (rows=766650239 width=88) + Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] + Merge Join Operator [MERGEJOIN_1078] (rows=766650239 width=88) + Conds:RS_176._col7=RS_1155._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 46 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1155] PartitionCols:_col0 - Select Operator [SEL_1094] (rows=36524 width=1119) + Select Operator [SEL_1152] (rows=2300 width=1179) Output:["_col0"] - Filter Operator [FIL_1091] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 44 [SIMPLE_EDGE] - SHUFFLE [RS_179] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1075] (rows=696954748 width=88) - Conds:RS_176._col1, _col8=RS_1152._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col15"] - <-Map 46 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1152] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1148] - <-Reducer 43 [SIMPLE_EDGE] - SHUFFLE [RS_176] - PartitionCols:_col1, _col8 - Merge Join Operator [MERGEJOIN_1074] (rows=633595212 width=88) - Conds:RS_1276._col1=RS_1137._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 41 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1137] + Filter Operator [FIL_1151] (rows=2300 width=1179) + predicate:p_promo_sk is not null + TableScan [TS_40] (rows=2300 width=1179) + default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_176] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_1077] (rows=696954748 width=88) + Conds:RS_173._col0=RS_1101._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1101] + PartitionCols:_col0 + Select Operator [SEL_1096] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_1093] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_173] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1076] (rows=633595212 width=88) + Conds:RS_1278._col1=RS_1139._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 42 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1139] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1134] + Please refer to the previous Select Operator [SEL_1136] <-Map 72 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1276] + SHUFFLE [RS_1278] PartitionCols:_col1 - Select Operator [SEL_1275] (rows=575995635 width=88) + Select Operator [SEL_1277] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1274] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_221_ad1_ca_address_sk_min) AND DynamicValue(RS_221_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_221_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_246_cd1_cd_demo_sk_min) AND DynamicValue(RS_246_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_246_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_211_hd1_hd_demo_sk_min) AND DynamicValue(RS_211_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_211_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_174_item_i_item_sk_min) AND DynamicValue(RS_174_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_174_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_177_store_returns_sr_item_sk_min) AND DynamicValue(RS_177_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_177_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_215_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_215_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_215_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_47_item_i_item_sk_min) AND DynamicValue(RS_47_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_47_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_183_promotion_p_promo_sk_min) AND DynamicValue(RS_183_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_183_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_180_d1_d_date_sk_min) AND DynamicValue(RS_180_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_180_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_218_store_s_store_sk_min) AND DynamicValue(RS_218_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_218_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_177_store_returns_sr_ticket_number_min) AND DynamicValue(RS_177_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_177_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + Filter Operator [FIL_1276] (rows=575995635 width=88) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_218_ad1_ca_address_sk_min) AND DynamicValue(RS_218_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_218_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_246_cd1_cd_demo_sk_min) AND DynamicValue(RS_246_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_246_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_208_hd1_hd_demo_sk_min) AND DynamicValue(RS_208_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_208_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_212_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_212_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_212_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_177_promotion_p_promo_sk_min) AND DynamicValue(RS_177_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_177_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_174_d1_d_date_sk_min) AND DynamicValue(RS_174_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_174_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_215_store_s_store_sk_min) AND DynamicValue(RS_215_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_215_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_221_store_returns_sr_ticket_number_min) AND DynamicValue(RS_221_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_221_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_158] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 42 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1146] - Group By Operator [GBY_1143] (rows=1 width=12) + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1148] + Group By Operator [GBY_1145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 41 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1141] - Group By Operator [GBY_1139] (rows=1 width=12) + <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1143] + Group By Operator [GBY_1141] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1136] (rows=5703 width=1436) + Select Operator [SEL_1138] (rows=5703 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1134] + Please refer to the previous Select Operator [SEL_1136] <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1239] - Please refer to the previous Group By Operator [GBY_1238] - <-Reducer 49 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1242] - Please refer to the previous Group By Operator [GBY_1241] - <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1247] - Group By Operator [GBY_1246] (rows=1 width=12) + BROADCAST [RS_1241] + Please refer to the previous Group By Operator [GBY_1240] + <-Reducer 67 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1254] + Please refer to the previous Group By Operator [GBY_1253] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1244] + Group By Operator [GBY_1243] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1104] - Group By Operator [GBY_1102] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1106] + Group By Operator [GBY_1104] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1100] (rows=36524 width=1119) + Select Operator [SEL_1102] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1094] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1251] - Group By Operator [GBY_1250] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1096] + <-Reducer 35 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1248] + Group By Operator [GBY_1247] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_916] - Group By Operator [GBY_915] (rows=1 width=12) + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_909] + Group By Operator [GBY_908] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_914] (rows=7920 width=107) + Select Operator [SEL_907] (rows=7920 width=107) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_1051] - <-Reducer 37 [BROADCAST_EDGE] vectorized + Please refer to the previous Merge Join Operator [MERGEJOIN_1053] + <-Reducer 39 [BROADCAST_EDGE] vectorized BROADCAST [RS_1271] Group By Operator [GBY_1270] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1121] - Group By Operator [GBY_1119] (rows=1 width=12) + <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1123] + Group By Operator [GBY_1121] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_1117] (rows=40000000 width=1014) + Select Operator [SEL_1119] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1112] - <-Reducer 50 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1245] - Group By Operator [GBY_1244] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + Please refer to the previous Select Operator [SEL_1114] + <-Reducer 48 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1246] + Group By Operator [GBY_1245] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1162] + PARTITION_ONLY_SHUFFLE [RS_1160] Group By Operator [GBY_1158] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1154] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1148] - <-Reducer 53 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1249] - Group By Operator [GBY_1248] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1211] - Group By Operator [GBY_1209] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1207] (rows=2300 width=1179) + Select Operator [SEL_1156] (rows=2300 width=1179) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1203] - <-Reducer 63 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_1152] + <-Reducer 58 [BROADCAST_EDGE] vectorized BROADCAST [RS_1267] Group By Operator [GBY_1266] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=52798136)"] - <-Reducer 62 [CUSTOM_SIMPLE_EDGE] vectorized + <-Reducer 57 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_1265] Group By Operator [GBY_1264] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=52798136)"] Select Operator [SEL_1263] (rows=52798137 width=135) Output:["_col0"] Please refer to the previous Select Operator [SEL_1261] - <-Reducer 68 [BROADCAST_EDGE] vectorized + <-Reducer 63 [BROADCAST_EDGE] vectorized BROADCAST [RS_1269] Group By Operator [GBY_1268] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 66 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 61 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_1225] Group By Operator [GBY_1223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_1221] (rows=1704 width=1910) Output:["_col0"] Please refer to the previous Select Operator [SEL_1217] - <-Reducer 71 [BROADCAST_EDGE] vectorized + <-Reducer 68 [BROADCAST_EDGE] vectorized BROADCAST [RS_1273] Group By Operator [GBY_1272] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1200] + Group By Operator [GBY_1196] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1192] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 71 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1275] + Group By Operator [GBY_1274] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] <-Map 69 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1132] - Group By Operator [GBY_1130] (rows=1 width=12) + SHUFFLE [RS_1134] + Group By Operator [GBY_1132] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_1128] (rows=1861800 width=385) + Select Operator [SEL_1130] (rows=1861800 width=385) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1123] + Please refer to the previous Select Operator [SEL_1125] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1237] + SHUFFLE [RS_1239] PartitionCols:_col2, _col1, _col3 - Select Operator [SEL_1236] (rows=746992327 width=88) + Select Operator [SEL_1238] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Group By Operator [GBY_1235] (rows=746992327 width=88) + Group By Operator [GBY_1237] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_124] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Group By Operator [GBY_123] (rows=1493984654 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col45)","sum(_col46)","sum(_col47)"],keys:_col26, _col48, _col27, _col7, _col9, _col14, _col15, _col16, _col17, _col21, _col22, _col23, _col24, _col51 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col49 Select Operator [SEL_122] (rows=1493984654 width=88) - Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col45","_col46","_col47","_col48","_col51"] + Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] Filter Operator [FIL_121] (rows=1493984654 width=88) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_1066] (rows=1493984654 width=88) - Conds:RS_118._col39=RS_1124._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col21","_col22","_col23","_col24","_col26","_col27","_col45","_col46","_col47","_col48","_col51","_col56"] + Merge Join Operator [MERGEJOIN_1068] (rows=1493984654 width=88) + Conds:RS_118._col37=RS_1126._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1124] + SHUFFLE [RS_1126] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1123] + Please refer to the previous Select Operator [SEL_1125] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_118] - PartitionCols:_col39 - Merge Join Operator [MERGEJOIN_1065] (rows=1358167838 width=88) - Conds:RS_115._col0=RS_116._col18(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col21","_col22","_col23","_col24","_col26","_col27","_col39","_col45","_col46","_col47","_col48","_col51"] + PartitionCols:_col37 + Merge Join Operator [MERGEJOIN_1067] (rows=1358167838 width=88) + Conds:RS_115._col0=RS_116._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_115] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1054] - <-Reducer 21 [SIMPLE_EDGE] + Please refer to the previous Merge Join Operator [MERGEJOIN_1056] + <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_116] - PartitionCols:_col18 + PartitionCols:_col16 Select Operator [SEL_96] (rows=1234698008 width=88) - Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col18","_col19","_col25","_col26","_col27","_col28","_col31"] - Merge Join Operator [MERGEJOIN_1064] (rows=1234698008 width=88) - Conds:RS_93._col13=RS_1114._col0(Inner),Output:["_col10","_col11","_col17","_col18","_col19","_col20","_col23","_col28","_col29","_col31","_col32","_col33","_col34"] - <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1114] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1112] - <-Reducer 20 [SIMPLE_EDGE] + Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] + Merge Join Operator [MERGEJOIN_1066] (rows=1234698008 width=88) + Conds:RS_93._col5, _col12=RS_1187._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 64 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1187] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_93] - PartitionCols:_col13 - Merge Join Operator [MERGEJOIN_1063] (rows=1122452711 width=88) - Conds:RS_90._col14=RS_1218._col0(Inner),Output:["_col10","_col11","_col13","_col17","_col18","_col19","_col20","_col23","_col28","_col29"] - <-Map 66 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1218] + PartitionCols:_col5, _col12 + Merge Join Operator [MERGEJOIN_1065] (rows=1122452711 width=88) + Conds:RS_90._col9=RS_1116._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1116] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1217] - <-Reducer 19 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_1114] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_90] - PartitionCols:_col14 - Merge Join Operator [MERGEJOIN_1062] (rows=1020411534 width=88) - Conds:RS_87._col9=RS_1194._col0(Inner),Output:["_col10","_col11","_col13","_col14","_col17","_col18","_col19","_col20","_col23"] - <-Reducer 56 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1194] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_1064] (rows=1020411534 width=88) + Conds:RS_87._col10=RS_1218._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] + <-Map 61 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1218] PartitionCols:_col0 - Select Operator [SEL_1193] (rows=52798137 width=135) - Output:["_col0"] - Filter Operator [FIL_1192] (rows=52798137 width=135) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1191] (rows=158394413 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 55 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col0 - Group By Operator [GBY_70] (rows=316788826 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 - Select Operator [SEL_68] (rows=316788826 width=135) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_1060] (rows=316788826 width=135) - Conds:RS_1190._col0, _col1=RS_1170._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] - <-Map 58 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1170] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1169] - <-Map 54 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1190] - PartitionCols:_col0, _col1 - Select Operator [SEL_1189] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1188] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_47_item_i_item_sk_min) AND DynamicValue(RS_47_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_47_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_50_store_returns_sr_item_sk_min) AND DynamicValue(RS_50_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_50_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_66_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_66_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_66_catalog_returns_cr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_66_catalog_returns_cr_order_number_min) AND DynamicValue(RS_66_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_66_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_59] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 42 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1145] - Please refer to the previous Group By Operator [GBY_1143] - <-Reducer 47 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1165] - Group By Operator [GBY_1163] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1159] - Group By Operator [GBY_1155] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1150] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1148] - <-Reducer 59 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1185] - Group By Operator [GBY_1184] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 58 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1180] - Group By Operator [GBY_1176] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1171] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1169] - <-Reducer 60 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1187] - Group By Operator [GBY_1186] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 58 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1181] - Group By Operator [GBY_1177] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1172] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1169] - <-Reducer 18 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_1217] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_87] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1061] (rows=927646829 width=88) - Conds:RS_84._col0=RS_85._col9(Inner),Output:["_col9","_col10","_col11","_col13","_col14","_col17","_col18","_col19","_col20","_col23"] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_84] + PartitionCols:_col10 + Merge Join Operator [MERGEJOIN_1063] (rows=927646829 width=88) + Conds:RS_84._col5=RS_1210._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 51 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1210] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1051] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_85] - PartitionCols:_col9 - Select Operator [SEL_58] (rows=843315281 width=88) - Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col14","_col15","_col16","_col17","_col20"] - Merge Join Operator [MERGEJOIN_1059] (rows=843315281 width=88) - Conds:RS_55._col7=RS_1204._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col9","_col10","_col11","_col12","_col15"] - <-Map 51 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1204] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1203] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1058] (rows=766650239 width=88) - Conds:RS_52._col0=RS_1097._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col15"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1097] - PartitionCols:_col0 - Select Operator [SEL_1093] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_1090] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 40 [SIMPLE_EDGE] - SHUFFLE [RS_52] + Select Operator [SEL_1209] (rows=52798137 width=135) + Output:["_col0"] + Filter Operator [FIL_1208] (rows=52798137 width=135) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1207] (rows=158394413 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 50 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Group By Operator [GBY_64] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_62] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_1061] (rows=316788826 width=135) + Conds:RS_1206._col0, _col1=RS_1167._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] + <-Map 53 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1167] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1166] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1206] + PartitionCols:_col0, _col1 + Select Operator [SEL_1205] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1204] (rows=287989836 width=135) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_60_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_60_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_60_catalog_returns_cr_order_number_min) AND DynamicValue(RS_60_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_60_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_53] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1147] + Please refer to the previous Group By Operator [GBY_1145] + <-Reducer 65 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1203] + Group By Operator [GBY_1201] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1197] + Group By Operator [GBY_1193] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1188] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 54 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1182] + Group By Operator [GBY_1181] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1177] + Group By Operator [GBY_1173] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1168] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1166] + <-Reducer 55 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1184] + Group By Operator [GBY_1183] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1178] + Group By Operator [GBY_1174] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1169] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1166] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1062] (rows=843315281 width=88) + Conds:RS_81._col0=RS_82._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 33 [SIMPLE_EDGE] + SHUFFLE [RS_81] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_1053] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_82] + PartitionCols:_col5 + Select Operator [SEL_52] (rows=766650239 width=88) + Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] + Merge Join Operator [MERGEJOIN_1060] (rows=766650239 width=88) + Conds:RS_49._col7=RS_1153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 46 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1153] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1057] (rows=696954748 width=88) - Conds:RS_49._col1, _col8=RS_1149._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col15"] - <-Map 46 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1149] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1148] - <-Reducer 39 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col1, _col8 - Merge Join Operator [MERGEJOIN_1056] (rows=633595212 width=88) - Conds:RS_1234._col1=RS_1135._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 41 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1135] + Please refer to the previous Select Operator [SEL_1152] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_1059] (rows=696954748 width=88) + Conds:RS_46._col0=RS_1099._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1099] + PartitionCols:_col0 + Select Operator [SEL_1095] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_1092] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Reducer 41 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1058] (rows=633595212 width=88) + Conds:RS_1236._col1=RS_1137._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 42 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1137] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1134] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1234] + Please refer to the previous Select Operator [SEL_1136] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1236] PartitionCols:_col1 - Select Operator [SEL_1233] (rows=575995635 width=88) + Select Operator [SEL_1235] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1232] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_94_ad1_ca_address_sk_min) AND DynamicValue(RS_94_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_94_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_119_cd1_cd_demo_sk_min) AND DynamicValue(RS_119_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_119_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_84_hd1_hd_demo_sk_min) AND DynamicValue(RS_84_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_84_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_47_item_i_item_sk_min) AND DynamicValue(RS_47_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_47_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_50_store_returns_sr_item_sk_min) AND DynamicValue(RS_50_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_50_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_88_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_88_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_88_catalog_sales_cs_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_56_promotion_p_promo_sk_min) AND DynamicValue(RS_56_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_56_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_53_d1_d_date_sk_min) AND DynamicValue(RS_53_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_53_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_91_store_s_store_sk_min) AND DynamicValue(RS_91_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_91_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_50_store_returns_sr_ticket_number_min) AND DynamicValue(RS_50_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_50_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + Filter Operator [FIL_1234] (rows=575995635 width=88) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_91_ad1_ca_address_sk_min) AND DynamicValue(RS_91_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_91_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_119_cd1_cd_demo_sk_min) AND DynamicValue(RS_119_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_119_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_81_hd1_hd_demo_sk_min) AND DynamicValue(RS_81_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_81_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_85_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_85_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_85_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_50_promotion_p_promo_sk_min) AND DynamicValue(RS_50_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_50_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_47_d1_d_date_sk_min) AND DynamicValue(RS_47_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_47_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_88_store_s_store_sk_min) AND DynamicValue(RS_88_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_88_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_94_store_returns_sr_ticket_number_min) AND DynamicValue(RS_94_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_94_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_31] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 42 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1144] - Please refer to the previous Group By Operator [GBY_1143] - <-Reducer 47 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1164] - Please refer to the previous Group By Operator [GBY_1163] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1201] - Group By Operator [GBY_1200] (rows=1 width=12) + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1146] + Please refer to the previous Group By Operator [GBY_1145] + <-Reducer 65 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1202] + Please refer to the previous Group By Operator [GBY_1201] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1150] + Group By Operator [GBY_1149] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1103] - Group By Operator [GBY_1101] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1105] + Group By Operator [GBY_1103] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1098] (rows=36524 width=1119) + Select Operator [SEL_1100] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1093] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1215] - Group By Operator [GBY_1214] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1095] + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1164] + Group By Operator [GBY_1163] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_695] - Group By Operator [GBY_694] (rows=1 width=12) + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_674] + Group By Operator [GBY_673] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_693] (rows=7920 width=107) + Select Operator [SEL_672] (rows=7920 width=107) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_1051] - <-Reducer 36 [BROADCAST_EDGE] vectorized + Please refer to the previous Merge Join Operator [MERGEJOIN_1053] + <-Reducer 38 [BROADCAST_EDGE] vectorized BROADCAST [RS_1229] Group By Operator [GBY_1228] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1120] - Group By Operator [GBY_1118] (rows=1 width=12) + <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1122] + Group By Operator [GBY_1120] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_1115] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1112] - <-Reducer 48 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1167] - Group By Operator [GBY_1166] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1160] - Group By Operator [GBY_1156] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1151] (rows=57591150 width=77) + Select Operator [SEL_1117] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1148] - <-Reducer 52 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1213] - Group By Operator [GBY_1212] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1114] + <-Reducer 47 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1162] + Group By Operator [GBY_1161] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1210] - Group By Operator [GBY_1208] (rows=1 width=12) + <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1159] + Group By Operator [GBY_1157] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1205] (rows=2300 width=1179) + Select Operator [SEL_1154] (rows=2300 width=1179) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1203] - <-Reducer 57 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1199] - Group By Operator [GBY_1198] (rows=1 width=228) + Please refer to the previous Select Operator [SEL_1152] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1215] + Group By Operator [GBY_1214] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=52798136)"] - <-Reducer 56 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1197] - Group By Operator [GBY_1196] (rows=1 width=228) + <-Reducer 51 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1213] + Group By Operator [GBY_1212] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=52798136)"] - Select Operator [SEL_1195] (rows=52798137 width=135) + Select Operator [SEL_1211] (rows=52798137 width=135) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1193] - <-Reducer 67 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_1209] + <-Reducer 62 [BROADCAST_EDGE] vectorized BROADCAST [RS_1227] Group By Operator [GBY_1226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 66 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 61 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_1224] Group By Operator [GBY_1222] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_1219] (rows=1704 width=1910) Output:["_col0"] Please refer to the previous Select Operator [SEL_1217] - <-Reducer 70 [BROADCAST_EDGE] vectorized + <-Reducer 66 [BROADCAST_EDGE] vectorized BROADCAST [RS_1231] Group By Operator [GBY_1230] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1198] + Group By Operator [GBY_1194] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1189] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 70 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1233] + Group By Operator [GBY_1232] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] <-Map 69 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1131] - Group By Operator [GBY_1129] (rows=1 width=12) + SHUFFLE [RS_1133] + Group By Operator [GBY_1131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_1125] (rows=1861800 width=385) + Select Operator [SEL_1127] (rows=1861800 width=385) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1123] + Please refer to the previous Select Operator [SEL_1125] diff --git a/ql/src/test/results/clientpositive/perf/tez/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/query72.q.out index 11573fb05d..3fe6d21909 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query72.q.out @@ -81,263 +81,263 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Reducer 10 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE) -Reducer 10 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 14 <- Map 20 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 22 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 25 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 11 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 27 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 28 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Map 10 <- Reducer 18 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 12 <- Map 19 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 24 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 26 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 28 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 29 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 4 vectorized - File Output Operator [FS_312] - Limit [LIM_311] (rows=100 width=135) + Reducer 7 vectorized + File Output Operator [FS_315] + Limit [LIM_314] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_310] (rows=37725837 width=135) + Select Operator [SEL_313] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] - Group By Operator [GBY_308] (rows=37725837 width=135) + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] + Group By Operator [GBY_311] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_68] (rows=75451675 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col3)","count(_col4)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_66] (rows=75451675 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_248] (rows=75451675 width=135) - Conds:RS_251._col0, _col1=RS_64._col4, _col6(Right Outer),Output:["_col15","_col17","_col24","_col30"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] + Merge Join Operator [MERGEJOIN_251] (rows=75451675 width=135) + Conds:RS_63._col4, _col6=RS_310._col0, _col1(Left Outer),Output:["_col13","_col15","_col22","_col28"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] PartitionCols:_col0, _col1 - Select Operator [SEL_250] (rows=28798881 width=106) + Select Operator [SEL_309] (rows=28798881 width=106) Output:["_col0","_col1"] - Filter Operator [FIL_249] (rows=28798881 width=106) + Filter Operator [FIL_308] (rows=28798881 width=106) predicate:cr_item_sk is not null - TableScan [TS_0] (rows=28798881 width=106) + TableScan [TS_60] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_64] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col4, _col6 - Select Operator [SEL_62] (rows=68592431 width=135) + Select Operator [SEL_59] (rows=68592431 width=135) Output:["_col4","_col6","_col13","_col15","_col22","_col28"] - Filter Operator [FIL_61] (rows=68592431 width=135) - predicate:(UDFToDouble(_col28) > (UDFToDouble(_col17) + 5.0D)) - Merge Join Operator [MERGEJOIN_247] (rows=205777295 width=135) - Conds:RS_58._col9=RS_294._col0(Inner),Output:["_col5","_col7","_col12","_col14","_col17","_col18","_col24","_col28"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] - PartitionCols:_col0 - Select Operator [SEL_293] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_292] (rows=73049 width=1119) - predicate:d_date_sk is not null - TableScan [TS_45] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_246] (rows=187070265 width=135) - Conds:RS_55._col0, _col18=RS_307._col0, _col1(Inner),Output:["_col5","_col7","_col9","_col12","_col14","_col17","_col18","_col24"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] - PartitionCols:_col0, _col1 - Select Operator [SEL_306] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_305] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_42] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col0, _col18 - Filter Operator [FIL_54] (rows=170063874 width=135) - predicate:(_col3 < _col15) - Merge Join Operator [MERGEJOIN_245] (rows=510191624 width=135) - Conds:RS_51._col1=RS_52._col6(Inner),Output:["_col0","_col3","_col5","_col7","_col9","_col12","_col14","_col15","_col17","_col18","_col24"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_239] (rows=41342400 width=15) - Conds:RS_254._col2=RS_257._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] - PartitionCols:_col0 - Select Operator [SEL_256] (rows=27 width=1029) - Output:["_col0","_col1"] - Filter Operator [FIL_255] (rows=27 width=1029) - predicate:w_warehouse_sk is not null - TableScan [TS_6] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] - PartitionCols:_col2 - Select Operator [SEL_253] (rows=37584000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_252] (rows=37584000 width=15) - predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) - TableScan [TS_3] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Reducer 17 [ONE_TO_ONE_EDGE] - FORWARD [RS_52] - PartitionCols:_col6 - Select Operator [SEL_41] (rows=463810558 width=135) - Output:["_col1","_col3","_col6","_col8","_col9","_col11","_col12","_col18"] - Merge Join Operator [MERGEJOIN_244] (rows=463810558 width=135) + Merge Join Operator [MERGEJOIN_250] (rows=68592431 width=135) + Conds:RS_56._col0, _col20=RS_307._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col20","_col26"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] + PartitionCols:_col0, _col1 + Select Operator [SEL_306] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_305] (rows=73049 width=1119) + predicate:(d_date_sk is not null and d_week_seq is not null) + TableScan [TS_46] (rows=73049 width=1119) + default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0, _col20 + Filter Operator [FIL_55] (rows=62356755 width=135) + predicate:(_col3 < _col17) + Merge Join Operator [MERGEJOIN_249] (rows=187070265 width=135) + Conds:RS_52._col1=RS_53._col8(Inner),Output:["_col0","_col3","_col5","_col9","_col14","_col16","_col17","_col20","_col26"] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_52] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_242] (rows=41342400 width=15) + Conds:RS_254._col2=RS_257._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_254] + PartitionCols:_col2 + Select Operator [SEL_253] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_252] (rows=37584000 width=15) + predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) + TableScan [TS_0] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] + PartitionCols:_col0 + Select Operator [SEL_256] (rows=27 width=1029) + Output:["_col0","_col1"] + Filter Operator [FIL_255] (rows=27 width=1029) + predicate:w_warehouse_sk is not null + TableScan [TS_3] (rows=27 width=1029) + default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col8 + Select Operator [SEL_45] (rows=170063874 width=135) + Output:["_col3","_col8","_col10","_col11","_col14","_col20"] + Filter Operator [FIL_44] (rows=170063874 width=135) + predicate:(UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0D)) + Merge Join Operator [MERGEJOIN_248] (rows=510191624 width=135) + Conds:RS_41._col1=RS_294._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col16","_col18","_col20"] + <-Map 26 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_294] + PartitionCols:_col0 + Select Operator [SEL_293] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_292] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_23] (rows=73049 width=1119) + default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_247] (rows=463810558 width=135) Conds:RS_38._col4=RS_284._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16","_col18"] - <-Map 25 [SIMPLE_EDGE] vectorized + <-Map 24 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_284] PartitionCols:_col0 Select Operator [SEL_283] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_282] (rows=462000 width=1436) predicate:i_item_sk is not null - TableScan [TS_23] (rows=462000 width=1436) + TableScan [TS_20] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] - <-Reducer 16 [SIMPLE_EDGE] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_243] (rows=421645953 width=135) + Merge Join Operator [MERGEJOIN_246] (rows=421645953 width=135) Conds:RS_35._col5=RS_304._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16"] - <-Map 24 [SIMPLE_EDGE] vectorized + <-Map 23 [SIMPLE_EDGE] vectorized SHUFFLE [RS_304] PartitionCols:_col0 Select Operator [SEL_303] (rows=2300 width=1179) Output:["_col0"] - TableScan [TS_21] (rows=2300 width=1179) + TableScan [TS_18] (rows=2300 width=1179) default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] - <-Reducer 15 [SIMPLE_EDGE] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_242] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_245] (rows=383314495 width=135) Conds:RS_32._col3=RS_276._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 22 [SIMPLE_EDGE] vectorized + <-Map 21 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_276] PartitionCols:_col0 Select Operator [SEL_275] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_274] (rows=3600 width=107) predicate:((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) - TableScan [TS_18] (rows=7200 width=107) + TableScan [TS_15] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 14 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_241] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_244] (rows=348467716 width=135) Conds:RS_29._col2=RS_268._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 20 [SIMPLE_EDGE] vectorized + <-Map 19 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_268] PartitionCols:_col0 Select Operator [SEL_267] (rows=930900 width=385) Output:["_col0"] Filter Operator [FIL_266] (rows=930900 width=385) predicate:((cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=385) + TableScan [TS_12] (rows=1861800 width=385) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_240] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_243] (rows=316788826 width=135) Conds:RS_302._col0=RS_260._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 18 [SIMPLE_EDGE] vectorized + <-Map 17 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_260] PartitionCols:_col0 Select Operator [SEL_259] (rows=36524 width=1119) Output:["_col0","_col1","_col2"] Filter Operator [FIL_258] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null and d_week_seq is not null) - TableScan [TS_12] (rows=73049 width=1119) + TableScan [TS_9] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_week_seq","d_year"] - <-Map 12 [SIMPLE_EDGE] vectorized + <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_302] PartitionCols:_col0 Select Operator [SEL_301] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Filter Operator [FIL_300] (rows=287989836 width=135) - predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_30_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_30_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_30_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_33_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_33_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_33_household_demographics_hd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_51_inventory_inv_item_sk_min) AND DynamicValue(RS_51_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_51_inventory_inv_item_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_59_d3_d_date_sk_min) AND DynamicValue(RS_59_d3_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_59_d3_d_date_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_9] (rows=287989836 width=135) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_30_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_30_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_30_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_33_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_33_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_33_household_demographics_hd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_52_inventory_inv_item_sk_min) AND DynamicValue(RS_52_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_52_inventory_inv_item_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_291] - Group By Operator [GBY_290] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=41342400)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_185] - Group By Operator [GBY_184] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=41342400)"] - Select Operator [SEL_183] (rows=41342400 width=15) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_239] - <-Reducer 19 [BROADCAST_EDGE] vectorized + <-Reducer 18 [BROADCAST_EDGE] vectorized BROADCAST [RS_265] Group By Operator [GBY_264] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_263] Group By Operator [GBY_262] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_261] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_259] - <-Reducer 21 [BROADCAST_EDGE] vectorized + <-Reducer 20 [BROADCAST_EDGE] vectorized BROADCAST [RS_273] Group By Operator [GBY_272] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_271] Group By Operator [GBY_270] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_269] (rows=930900 width=385) Output:["_col0"] Please refer to the previous Select Operator [SEL_267] - <-Reducer 23 [BROADCAST_EDGE] vectorized + <-Reducer 22 [BROADCAST_EDGE] vectorized BROADCAST [RS_281] Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_279] Group By Operator [GBY_278] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_277] (rows=3600 width=107) Output:["_col0"] Please refer to the previous Select Operator [SEL_275] - <-Reducer 26 [BROADCAST_EDGE] vectorized + <-Reducer 25 [BROADCAST_EDGE] vectorized BROADCAST [RS_289] Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_287] Group By Operator [GBY_286] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_285] (rows=462000 width=1436) Output:["_col0"] Please refer to the previous Select Operator [SEL_283] - <-Reducer 29 [BROADCAST_EDGE] vectorized + <-Reducer 27 [BROADCAST_EDGE] vectorized BROADCAST [RS_299] Group By Operator [GBY_298] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] + <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_297] Group By Operator [GBY_296] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_295] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_293] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_291] + Group By Operator [GBY_290] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=41342400)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_174] + Group By Operator [GBY_173] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=41342400)"] + Select Operator [SEL_172] (rows=41342400 width=15) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_242] diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index 5e2af5a0c6..396e9a93a0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -183,16 +183,16 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 18 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -204,144 +204,142 @@ Stage-0 File Output Operator [FS_244] Limit [LIM_243] (rows=100 width=1014) Number of rows:100 - Select Operator [SEL_242] (rows=3666666 width=1014) + Select Operator [SEL_242] (rows=4436665 width=1014) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_241] - Select Operator [SEL_240] (rows=3666666 width=1014) + Select Operator [SEL_240] (rows=4436665 width=1014) Output:["_col4","_col5","_col6","_col7"] - Group By Operator [GBY_239] (rows=3666666 width=1014) + Group By Operator [GBY_239] (rows=4436665 width=1014) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=7333332 width=1014) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col19 - Select Operator [SEL_47] (rows=7333332 width=1014) - Output:["_col6","_col7","_col12","_col19"] - Filter Operator [FIL_46] (rows=7333332 width=1014) - predicate:((((_col27 = 'KY') or (_col27 = 'GA') or (_col27 = 'NM')) and _col14 BETWEEN 100 AND 200) or (((_col27 = 'MT') or (_col27 = 'OR') or (_col27 = 'IN')) and _col14 BETWEEN 150 AND 300) or (((_col27 = 'WI') or (_col27 = 'MO') or (_col27 = 'WV')) and _col14 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_206] (rows=22000000 width=1014) - Conds:RS_43._col2=RS_238._col0(Inner),Output:["_col6","_col7","_col12","_col14","_col19","_col27"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] + Group By Operator [GBY_48] (rows=8873331 width=1014) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","count(_col4)","sum(_col16)","count(_col16)","sum(_col15)","count(_col15)"],keys:_col28 + Merge Join Operator [MERGEJOIN_206] (rows=8873331 width=1014) + Conds:RS_44._col13=RS_238._col0(Inner),Output:["_col4","_col15","_col16","_col28"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + PartitionCols:_col0 + Select Operator [SEL_237] (rows=72 width=200) + Output:["_col0","_col1"] + Filter Operator [FIL_236] (rows=72 width=200) + predicate:r_reason_sk is not null + TableScan [TS_21] (rows=72 width=200) + default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col13 + Merge Join Operator [MERGEJOIN_205] (rows=8066665 width=1014) + Conds:RS_41._col2=RS_217._col0(Inner),Output:["_col4","_col13","_col15","_col16"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] PartitionCols:_col0 - Select Operator [SEL_237] (rows=20000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_236] (rows=20000000 width=1014) - predicate:((ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_21] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_43] + Select Operator [SEL_216] (rows=4602 width=585) + Output:["_col0"] + Filter Operator [FIL_215] (rows=4602 width=585) + predicate:wp_web_page_sk is not null + TableScan [TS_18] (rows=4602 width=585) + default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_41] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_205] (rows=7086373 width=135) - Conds:RS_40._col3, _col21, _col22=RS_234._col0, _col1, _col2(Inner),Output:["_col2","_col6","_col7","_col12","_col14","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_233] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_232] (rows=1861800 width=385) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_18] (rows=1861800 width=385) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col3, _col21, _col22 - Filter Operator [FIL_39] (rows=6442158 width=135) - predicate:(((_col21 = 'D') and (_col22 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col21 = 'M') and (_col22 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col21 = 'U') and (_col22 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) - Merge Join Operator [MERGEJOIN_204] (rows=77305913 width=135) - Conds:RS_36._col1=RS_235._col0(Inner),Output:["_col2","_col3","_col6","_col7","_col12","_col13","_col14","_col19","_col21","_col22"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_233] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_203] (rows=70278102 width=135) - Conds:RS_33._col4=RS_231._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col12","_col13","_col14","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col0 - Select Operator [SEL_230] (rows=72 width=200) - Output:["_col0","_col1"] - Filter Operator [FIL_229] (rows=72 width=200) - predicate:r_reason_sk is not null - TableScan [TS_12] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_202] (rows=63889183 width=135) - Conds:RS_30._col10=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col12","_col13","_col14"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] - PartitionCols:_col0 - Select Operator [SEL_219] (rows=4602 width=585) - Output:["_col0"] - Filter Operator [FIL_218] (rows=4602 width=585) - predicate:wp_web_page_sk is not null - TableScan [TS_9] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_201] (rows=58081075 width=135) - Conds:RS_27._col8=RS_212._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col10","_col12","_col13","_col14"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_210] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_200] (rows=52800977 width=135) - Conds:RS_209._col0, _col5=RS_228._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0, _col5 - Select Operator [SEL_208] (rows=14398467 width=92) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_207] (rows=14398467 width=92) - predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) - TableScan [TS_0] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - PartitionCols:_col1, _col3 - Select Operator [SEL_227] (rows=48000888 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_226] (rows=48000888 width=135) - predicate:((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_31_web_page_wp_web_page_sk_min) AND DynamicValue(RS_31_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_31_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_3] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - Group By Operator [GBY_214] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_211] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - Group By Operator [GBY_222] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=4602 width=585) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] + Filter Operator [FIL_40] (rows=7333332 width=1014) + predicate:((((_col24 = 'KY') or (_col24 = 'GA') or (_col24 = 'NM')) and _col6 BETWEEN 100 AND 200) or (((_col24 = 'MT') or (_col24 = 'OR') or (_col24 = 'IN')) and _col6 BETWEEN 150 AND 300) or (((_col24 = 'WI') or (_col24 = 'MO') or (_col24 = 'WV')) and _col6 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_204] (rows=22000000 width=1014) + Conds:RS_37._col11=RS_235._col0(Inner),Output:["_col2","_col4","_col6","_col13","_col15","_col16","_col24"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + PartitionCols:_col0 + Select Operator [SEL_234] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_233] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and ca_address_sk is not null) + TableScan [TS_15] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col11 + Merge Join Operator [MERGEJOIN_203] (rows=5856506 width=135) + Conds:RS_34._col12, _col18, _col19=RS_231._col0, _col1, _col2(Inner),Output:["_col2","_col4","_col6","_col11","_col13","_col15","_col16"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_230] (rows=1861800 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_229] (rows=1861800 width=385) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_12] (rows=1861800 width=385) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col12, _col18, _col19 + Filter Operator [FIL_33] (rows=5324097 width=135) + predicate:(((_col18 = 'D') and (_col19 = 'Primary') and _col5 BETWEEN 50 AND 100) or ((_col18 = 'M') and (_col19 = '4 yr Degree') and _col5 BETWEEN 100 AND 150) or ((_col18 = 'U') and (_col19 = 'Advanced Degree') and _col5 BETWEEN 150 AND 200)) + Merge Join Operator [MERGEJOIN_202] (rows=63889183 width=135) + Conds:RS_30._col10=RS_232._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col11","_col12","_col13","_col15","_col16","_col18","_col19"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_230] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col10 + Merge Join Operator [MERGEJOIN_201] (rows=58081075 width=135) + Conds:RS_27._col1, _col3=RS_228._col0, _col5(Inner),Output:["_col2","_col4","_col5","_col6","_col10","_col11","_col12","_col13","_col15","_col16"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col0, _col5 + Select Operator [SEL_227] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_226] (rows=14398467 width=92) + predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) + TableScan [TS_6] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_200] (rows=52800977 width=135) + Conds:RS_225._col0=RS_209._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_207] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_224] (rows=48000888 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_223] (rows=48000888 width=135) + predicate:((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_42_web_page_wp_web_page_sk_min) AND DynamicValue(RS_42_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_42_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] + Group By Operator [GBY_211] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_210] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_208] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_218] (rows=4602 width=585) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_216] diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index ba80eb5223..781acd6da3 100644 --- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -429,9 +429,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -448,68 +448,70 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p4 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized Map 7 Map Operator Tree: TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -517,53 +519,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col3 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -593,9 +595,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -612,68 +614,70 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p4 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized Map 7 Map Operator Tree: TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -681,53 +685,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col3 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index 6b481bcddc..095c621865 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -166,32 +166,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: p2 @@ -205,31 +205,31 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 7 Map Operator Tree: @@ -257,33 +257,38 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) + key expressions: _col9 (type: int), _col10 (type: string) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + 0 _col9 (type: int), _col10 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) - Reducer 4 + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -291,15 +296,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index b0fd6c9bbf..24723d8ae9 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -170,32 +170,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: p2 @@ -209,31 +209,31 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 7 Map Operator Tree: @@ -261,33 +261,38 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) + key expressions: _col9 (type: int), _col10 (type: string) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + 0 _col9 (type: int), _col10 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) - Reducer 4 + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -295,15 +300,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out index a074c061d2..7f206aa013 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out @@ -221,26 +221,6 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized - Map 5 Map Operator Tree: TableScan alias: p2 @@ -254,13 +234,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan alias: p3 @@ -280,6 +260,26 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -306,49 +306,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col18 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Map-reduce partition columns: _col18 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col18 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out index d687eb7306..3d1003ebe6 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out @@ -225,26 +225,6 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized - Map 5 Map Operator Tree: TableScan alias: p2 @@ -258,13 +238,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan alias: p3 @@ -284,6 +264,26 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -310,49 +310,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col18 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Map-reduce partition columns: _col18 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col18 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator