diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 8a561e5771..348e07bbf8 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1628,6 +1628,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal + " expressed as multiple of Local FS read cost"), HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", true, "Toggle display of CBO warnings like missing column stats"), + HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS("hive.cbo.stats.correlated.multi.key.joins", false, + "When CBO estimates output rows for a join involving multiple columns, the default behavior assumes" + + "the columns are independent. Setting this flag to true will cause the estimator to assume" + + "the columns are correlated."), AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"), SEMIJOIN_CONVERSION("hive.optimize.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"), HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index ab5848a376..5cd6a5de3b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -412,7 +412,7 @@ private static RelOptPlanner createPlanner( CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), Boolean.FALSE.toString()); CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties); - boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_CORRELATED_MULTI_KEY_JOINS); + boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS); boolean heuristicMaterializationStrategy = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_SELECTION_STRATEGY).equals("heuristic"); HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, diff --git a/ql/src/test/results/clientpositive/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/join_alt_syntax.q.out index 57a9109bf7..20a0ebc53f 100644 --- a/ql/src/test/results/clientpositive/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/join_alt_syntax.q.out @@ -364,9 +364,9 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -384,35 +384,33 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -427,59 +425,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -501,10 +451,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -512,6 +462,54 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) + TableScan + alias: p4 + filterExpr: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col6 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -530,9 +528,9 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -550,35 +548,33 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -593,59 +589,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -667,10 +615,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -678,6 +626,54 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) + TableScan + alias: p4 + filterExpr: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col6 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out index 741b11f371..e1a4c99bf6 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out @@ -146,31 +146,31 @@ from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p2 filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) @@ -183,18 +183,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -209,59 +209,54 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col9 (type: int), _col10 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -286,14 +281,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out index f35d397f2a..b12f0b4151 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out @@ -150,31 +150,31 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p2 filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) @@ -187,18 +187,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -213,59 +213,54 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col9 (type: int), _col10 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -290,14 +285,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out index 3d6af2d769..536390b689 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out @@ -186,48 +186,48 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p2 - filterExpr: (p2_name is not null and p2_partkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) + alias: p2 + filterExpr: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -240,37 +240,37 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + filterExpr: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) + 0 _col10 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -283,11 +283,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col18 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col18 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -310,21 +310,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col18 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out index a7dfc2c422..b9327e38bc 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out @@ -190,48 +190,48 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p2 - filterExpr: (p2_name is not null and p2_partkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) + alias: p2 + filterExpr: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -244,37 +244,37 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + filterExpr: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) + 0 _col10 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -287,11 +287,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col18 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col18 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -314,21 +314,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col18 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out index d0fa81d913..21acc5c467 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out @@ -96,7 +96,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 15 Map Operator Tree: TableScan alias: store @@ -120,56 +120,37 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) - Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) - Reducer 5 <- Reducer 4 (GROUP, 582) + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 438), Reducer 9 (PARTITION-LEVEL SORT, 438) + Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 481), Reducer 10 (PARTITION-LEVEL SORT, 481) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) + Reducer 4 <- Map 16 (PARTITION-LEVEL SORT, 645), Reducer 3 (PARTITION-LEVEL SORT, 645) + Reducer 5 <- Reducer 4 (GROUP, 704) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized Map 12 - Map Operator Tree: - TableScan - alias: d3 - filterExpr: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 13 Map Operator Tree: TableScan alias: store_returns @@ -183,51 +164,51 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + value expressions: _col0 (type: int), _col4 (type: int) Execution mode: vectorized - Map 15 + Map 13 Map Operator Tree: TableScan - alias: d2 - filterExpr: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + alias: d1 + filterExpr: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 14 Map Operator Tree: TableScan - alias: d1 - filterExpr: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) + alias: d2 + filterExpr: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 16 Map Operator Tree: TableScan alias: item @@ -247,25 +228,44 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 9 + Map 7 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: d3 + filterExpr: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: int) Execution mode: vectorized Reducer 10 Reduce Operator Tree: @@ -275,31 +275,47 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) Reducer 11 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int), _col1 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col7, _col8, _col9, _col10 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int) - sort order: +++ - Map-reduce partition columns: _col7 (type: int), _col8 (type: int), _col9 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col10 (type: int) - Reducer 14 + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col7, _col8, _col10 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col7, _col8, _col10, _col16 + input vertices: + 1 Map 15 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col16 (type: string), _col1 (type: int), _col5 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + outputColumnNames: _col1, _col5, _col9, _col11, _col12, _col14 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int), _col11 (type: int) + sort order: ++ + Map-reduce partition columns: _col12 (type: int), _col11 (type: int) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: int), _col9 (type: int), _col14 (type: int) + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -307,85 +323,57 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col9, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int) + 1 _col12 (type: int), _col11 (type: int) + outputColumnNames: _col3, _col7, _col11, _col15, _col20 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int), _col9 (type: string), _col10 (type: string) + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col7 (type: string), _col15 (type: int), _col20 (type: int) Reducer 4 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col7 (type: int), _col8 (type: int), _col9 (type: int) - outputColumnNames: _col3, _col5, _col9, _col10, _col14, _col21 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col9, _col10, _col14, _col21, _col25 - input vertices: - 1 Map 16 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col9 (type: string), _col10 (type: string), _col25 (type: string), _col5 (type: int), _col21 (type: int), _col14 (type: int), UDFToDouble(_col5) (type: double), (UDFToDouble(_col5) * UDFToDouble(_col5)) (type: double), UDFToDouble(_col21) (type: double), (UDFToDouble(_col21) * UDFToDouble(_col21)) (type: double), UDFToDouble(_col14) (type: double), (UDFToDouble(_col14) * UDFToDouble(_col14)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col3), sum(_col3), sum(_col7), sum(_col6), count(_col4), sum(_col4), sum(_col9), sum(_col8), count(_col5), sum(_col5), sum(_col11), sum(_col10) - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: double), _col14 (type: double) + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col7, _col15, _col20, _col24, _col25 + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col24 (type: string), _col25 (type: string), _col7 (type: string), _col15 (type: int), _col20 (type: int), _col3 (type: int), UDFToDouble(_col15) (type: double), (UDFToDouble(_col15) * UDFToDouble(_col15)) (type: double), UDFToDouble(_col20) (type: double), (UDFToDouble(_col20) * UDFToDouble(_col20)) (type: double), UDFToDouble(_col3) (type: double), (UDFToDouble(_col3) * UDFToDouble(_col3)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col3), sum(_col3), sum(_col7), sum(_col6), count(_col4), sum(_col4), sum(_col9), sum(_col8), count(_col5), sum(_col5), sum(_col11), sum(_col10) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: double), _col14 (type: double) Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -394,15 +382,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint), (UDFToDouble(_col4) / _col3) (type: double), power(((_col5 - ((_col6 * _col6) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (power(((_col5 - ((_col6 * _col6) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (UDFToDouble(_col4) / _col3)) (type: double), _col7 (type: bigint), (UDFToDouble(_col8) / _col7) (type: double), power(((_col9 - ((_col10 * _col10) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (power(((_col9 - ((_col10 * _col10) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) / (UDFToDouble(_col8) / _col7)) (type: double), _col11 (type: bigint), (UDFToDouble(_col12) / _col11) (type: double), (power(((_col13 - ((_col14 * _col14) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END), 0.5) / (UDFToDouble(_col12) / _col11)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Reducer 6 @@ -411,7 +399,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: bigint), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col10 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -422,6 +410,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index a34d3e8862..aff58cc018 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -109,7 +109,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 19 + Map 20 Map Operator Tree: TableScan alias: store @@ -133,9 +133,9 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 975), Map 20 (PARTITION-LEVEL SORT, 975) - Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 486), Reducer 13 (PARTITION-LEVEL SORT, 486) - Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 564), Reducer 14 (PARTITION-LEVEL SORT, 564) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 432), Map 19 (PARTITION-LEVEL SORT, 432) + Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009) + Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 534), Reducer 14 (PARTITION-LEVEL SORT, 534) Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 899), Reducer 15 (PARTITION-LEVEL SORT, 899) Reducer 17 <- Reducer 16 (GROUP, 640) Reducer 18 <- Reducer 17 (GROUP, 1) @@ -154,26 +154,33 @@ STAGE PLANS: expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9 - input vertices: - 1 Map 19 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 20 + Map 19 + Map Operator Tree: + TableScan + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 21 Map Operator Tree: TableScan alias: customer @@ -193,7 +200,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 21 + Map 22 Map Operator Tree: TableScan alias: item @@ -213,25 +220,6 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map 22 - Map Operator Tree: - TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map 23 Map Operator Tree: TableScan @@ -253,73 +241,85 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized Reducer 13 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + 0 _col0 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4, _col8, _col10, _col11 + input vertices: + 1 Map 20 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col8 (type: string), _col10 (type: string), _col11 (type: string) Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 + outputColumnNames: _col0, _col4, _col8, _col10, _col11, _col13, _col14, _col15 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) + value expressions: _col4 (type: decimal(7,2)), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string) Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col8, _col10, _col11, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: string), _col13 (type: string) + key expressions: _col11 (type: string), _col15 (type: string) sort order: ++ - Map-reduce partition columns: _col9 (type: string), _col13 (type: string) + Map-reduce partition columns: _col11 (type: string), _col15 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) + value expressions: _col4 (type: decimal(7,2)), _col8 (type: string), _col10 (type: string), _col13 (type: string), _col14 (type: string), _col17 (type: decimal(7,2)), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: int) Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: string), _col13 (type: string) + 0 _col11 (type: string), _col15 (type: string) 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col6, _col8, _col11, _col12, _col15, _col16, _col17, _col18, _col19, _col22 + outputColumnNames: _col4, _col8, _col10, _col13, _col14, _col17, _col18, _col19, _col20, _col21, _col22 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4) - keys: _col11 (type: string), _col12 (type: string), _col6 (type: string), _col8 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int), _col22 (type: string) + keys: _col22 (type: string), _col17 (type: decimal(7,2)), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: int), _col13 (type: string), _col14 (type: string), _col8 (type: string), _col10 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) + key expressions: _col0 (type: string), _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) sort order: ++++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col10 (type: decimal(17,2)) Reducer 17 @@ -327,7 +327,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int), KEY._col9 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: decimal(7,2)), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: int), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE @@ -367,7 +367,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: store @@ -391,9 +391,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 564), Reducer 3 (PARTITION-LEVEL SORT, 564) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899) Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640) #### A masked pattern was here #### @@ -412,30 +412,31 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 10 Map Operator Tree: TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized Map 11 Map Operator Tree: @@ -458,6 +459,25 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized Map 7 + Map Operator Tree: + TableScan + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: item @@ -477,27 +497,23 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)) + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -507,25 +523,25 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col9, _col10 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col4, _col8, _col9, _col11, _col12 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15 + outputColumnNames: _col1, _col4, _col8, _col9, _col11, _col12, _col14, _col16, _col17 input vertices: - 1 Map 8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 1 Map 9 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string) - Reducer 3 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: string), _col11 (type: string), _col12 (type: int), _col14 (type: string), _col16 (type: string), _col17 (type: string) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -533,43 +549,27 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 + outputColumnNames: _col4, _col8, _col9, _col11, _col12, _col14, _col16, _col17, _col19, _col20, _col21 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col15 (type: string), _col19 (type: string) + key expressions: _col17 (type: string), _col21 (type: string) sort order: ++ - Map-reduce partition columns: _col15 (type: string), _col19 (type: string) + Map-reduce partition columns: _col17 (type: string), _col21 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col17 (type: string), _col18 (type: string) + value expressions: _col4 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: string), _col11 (type: string), _col12 (type: int), _col14 (type: string), _col16 (type: string), _col19 (type: string), _col20 (type: string) Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col15 (type: string), _col19 (type: string) + 0 _col17 (type: string), _col21 (type: string) 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col17, _col18, _col22 + outputColumnNames: _col4, _col8, _col9, _col11, _col12, _col14, _col16, _col19, _col20, _col22 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4) - keys: _col17 (type: string), _col18 (type: string), _col12 (type: string), _col22 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col14 (type: string) + keys: _col19 (type: string), _col20 (type: string), _col14 (type: string), _col22 (type: string), _col8 (type: decimal(7,2)), _col9 (type: string), _col11 (type: string), _col12 (type: int), _col16 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out index d15c8cee67..79cbef57a6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out @@ -102,7 +102,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 15 Map Operator Tree: TableScan alias: store @@ -126,54 +126,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 374), Reducer 9 (PARTITION-LEVEL SORT, 374) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 806), Reducer 2 (PARTITION-LEVEL SORT, 806) - Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486) - Reducer 5 <- Reducer 4 (GROUP, 582) + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 437), Reducer 9 (PARTITION-LEVEL SORT, 437) + Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 481), Reducer 10 (PARTITION-LEVEL SORT, 481) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) + Reducer 4 <- Map 16 (PARTITION-LEVEL SORT, 645), Reducer 3 (PARTITION-LEVEL SORT, 645) + Reducer 5 <- Reducer 4 (GROUP, 704) Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) - Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan - alias: d3 - filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 12 Map Operator Tree: @@ -188,12 +169,31 @@ STAGE PLANS: expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)) + Execution mode: vectorized + Map 13 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 14 Map Operator Tree: @@ -214,7 +214,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 + Map 16 Map Operator Tree: TableScan alias: item @@ -237,41 +237,41 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: d1 - filterExpr: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + alias: d3 + filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 8 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)) Execution mode: vectorized Reducer 10 Reduce Operator Tree: @@ -279,32 +279,48 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int), _col1 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col8, _col9, _col10, _col11 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int) - sort order: +++ - Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2)) - Reducer 13 + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col10 (type: decimal(7,2)) + Reducer 11 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col6 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) + outputColumnNames: _col1, _col3, _col5, _col7, _col8, _col10 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col7, _col8, _col10, _col18, _col19 + input vertices: + 1 Map 15 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: string), _col19 (type: string), _col1 (type: int), _col5 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col10 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col7, _col11, _col13, _col14, _col16 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col14 (type: int), _col13 (type: int) + sort order: ++ + Map-reduce partition columns: _col14 (type: int), _col13 (type: int) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col7 (type: int), _col11 (type: decimal(7,2)), _col16 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -313,65 +329,53 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)) + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col8 (type: int), _col9 (type: int), _col10 (type: int) - outputColumnNames: _col1, _col3, _col5, _col12, _col20 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int) + 1 _col14 (type: int), _col13 (type: int) + outputColumnNames: _col3, _col8, _col9, _col14, _col18, _col23 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col14 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col12 (type: decimal(7,2)), _col20 (type: decimal(7,2)) + Map-reduce partition columns: _col14 (type: int) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string), _col18 (type: decimal(7,2)), _col23 (type: decimal(7,2)) Reducer 4 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col14 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col12, _col20, _col25, _col26 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col12, _col20, _col25, _col26, _col28, _col29 - input vertices: - 1 Map 16 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5), sum(_col20), sum(_col12) - keys: _col25 (type: string), _col26 (type: string), _col28 (type: string), _col29 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) + outputColumnNames: _col3, _col8, _col9, _col18, _col23, _col28, _col29 + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col18), sum(_col23), sum(_col3) + keys: _col28 (type: string), _col29 (type: string), _col8 (type: string), _col9 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -380,11 +384,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 6 @@ -393,7 +397,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -410,16 +414,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index a3b0610f4a..e9d986b6c2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -100,7 +100,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 15 Map Operator Tree: TableScan alias: store @@ -124,14 +124,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) - Reducer 4 <- Reducer 3 (GROUP, 640) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 437), Reducer 9 (PARTITION-LEVEL SORT, 437) + Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 481), Reducer 10 (PARTITION-LEVEL SORT, 481) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) + Reducer 4 <- Map 16 (PARTITION-LEVEL SORT, 645), Reducer 3 (PARTITION-LEVEL SORT, 645) + Reducer 5 <- Reducer 4 (GROUP, 704) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) #### A masked pattern was here #### Vertices: Map 1 @@ -154,25 +154,6 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map 12 Map Operator Tree: TableScan @@ -186,12 +167,31 @@ STAGE PLANS: expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int) + Execution mode: vectorized + Map 13 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 14 Map Operator Tree: @@ -212,7 +212,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 + Map 16 Map Operator Tree: TableScan alias: item @@ -232,7 +232,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: d3 @@ -251,7 +251,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: store_sales @@ -265,13 +265,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: int) Execution mode: vectorized Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + Reducer 11 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -279,9 +295,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col6 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col10, _col11, _col13, _col18, _col19 + outputColumnNames: _col1, _col3, _col5, _col7, _col8, _col10 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -289,36 +305,20 @@ STAGE PLANS: keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col10, _col11, _col13, _col18, _col19, _col21, _col22 + outputColumnNames: _col1, _col5, _col7, _col8, _col10, _col18, _col19 input vertices: - 1 Map 16 + 1 Map 15 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: string), _col19 (type: string), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col21 (type: string), _col22 (type: string) - outputColumnNames: _col1, _col2, _col8, _col13, _col14, _col16, _col21, _col22 + expressions: _col18 (type: string), _col19 (type: string), _col1 (type: int), _col5 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + outputColumnNames: _col1, _col2, _col7, _col11, _col13, _col14, _col16 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col14 (type: int), _col13 (type: int) sort order: ++ Map-reduce partition columns: _col14 (type: int), _col13 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col16 (type: int), _col21 (type: string), _col22 (type: string) - Reducer 13 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int) + value expressions: _col1 (type: string), _col2 (type: string), _col7 (type: int), _col11 (type: int), _col16 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -343,22 +343,38 @@ STAGE PLANS: keys: 0 _col1 (type: int), _col2 (type: int) 1 _col14 (type: int), _col13 (type: int) - outputColumnNames: _col3, _col7, _col8, _col14, _col22, _col27, _col28 + outputColumnNames: _col3, _col7, _col8, _col13, _col17, _col22 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: int) + sort order: + + Map-reduce partition columns: _col13 (type: int) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col7 (type: string), _col8 (type: string), _col17 (type: int), _col22 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col13 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col7, _col8, _col17, _col22, _col27, _col28 + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col14), sum(_col22), sum(_col3) - keys: _col7 (type: string), _col8 (type: string), _col27 (type: string), _col28 (type: string) + aggregations: sum(_col17), sum(_col22), sum(_col3) + keys: _col27 (type: string), _col28 (type: string), _col7 (type: string), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1020411534 Data size: 90021001838 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -366,20 +382,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205767 Data size: 45010500919 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -390,22 +406,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int) Reducer 9 Reduce Operator Tree: Join Operator @@ -414,14 +414,14 @@ STAGE PLANS: keys: 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col1, _col3, _col5, _col10, _col11, _col13 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query50.q.out b/ql/src/test/results/clientpositive/perf/spark/query50.q.out index 0c4528fe8d..33130ed293 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query50.q.out @@ -139,7 +139,7 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col10 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -148,14 +148,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 36), Map 7 (PARTITION-LEVEL SORT, 36) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 436), Reducer 2 (PARTITION-LEVEL SORT, 436) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 438), Reducer 3 (PARTITION-LEVEL SORT, 438) - Reducer 5 <- Reducer 4 (GROUP, 529) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 437), Reducer 2 (PARTITION-LEVEL SORT, 437) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 481), Reducer 3 (PARTITION-LEVEL SORT, 481) + Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: store_returns @@ -169,13 +189,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + value expressions: _col0 (type: int) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: d2 @@ -194,26 +214,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) - Execution mode: vectorized Map 9 Map Operator Tree: TableScan @@ -239,32 +239,32 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col3, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col3 (type: int) - 1 _col1 (type: int), _col2 (type: int), _col4 (type: int) - outputColumnNames: _col0, _col7, _col10 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col5 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col7 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col7 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col10 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int) Reducer 4 Local Work: Map Reduce Local Work @@ -273,35 +273,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col7 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col7, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col3, _col5 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col7, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + outputColumnNames: _col0, _col5, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 10 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string), CASE WHEN (((_col0 - _col7) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 30) and ((_col0 - _col7) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 60) and ((_col0 - _col7) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 90) and ((_col0 - _col7) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col0 - _col7) > 120)) THEN (1) ELSE (0) END (type: int) + expressions: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string), CASE WHEN (((_col5 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 30) and ((_col5 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 60) and ((_col5 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 90) and ((_col5 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col5 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14) keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) sort order: ++++++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) Reducer 5 @@ -312,11 +312,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) sort order: ++++++++++ - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) Reducer 6 @@ -325,7 +325,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out index 241d6d8e1f..a66ad45bb6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join JOIN[84][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 4' is a cross product -Warning: Shuffle Join JOIN[115][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[145][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[83][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0, $hdt$_4, $hdt$_5, $hdt$_6]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[114][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 6' is a cross product Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[143][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -193,11 +193,11 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 23 <- Map 22 (GROUP, 2) - Reducer 24 <- Reducer 23 (GROUP, 1) + Reducer 22 <- Map 21 (GROUP, 2) + Reducer 23 <- Reducer 22 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 22 + Map 21 Map Operator Tree: TableScan alias: date_dim @@ -221,7 +221,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 23 + Reducer 22 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -240,7 +240,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 24 + Reducer 23 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -264,7 +264,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 12 + Map 10 Map Operator Tree: TableScan alias: store @@ -288,43 +288,23 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 654), Reducer 17 (PARTITION-LEVEL SORT, 654) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 458), Map 18 (PARTITION-LEVEL SORT, 458), Map 19 (PARTITION-LEVEL SORT, 458) - Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 505), Reducer 14 (PARTITION-LEVEL SORT, 505) - Reducer 16 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009) - Reducer 17 <- Reducer 16 (GROUP, 610) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 458), Map 16 (PARTITION-LEVEL SORT, 458), Map 17 (PARTITION-LEVEL SORT, 458) + Reducer 13 <- Map 18 (PARTITION-LEVEL SORT, 505), Reducer 12 (PARTITION-LEVEL SORT, 505) + Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009) + Reducer 15 <- Reducer 14 (GROUP, 610) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 654), Reducer 15 (PARTITION-LEVEL SORT, 654) Reducer 26 <- Map 25 (GROUP, 2) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 772), Reducer 2 (PARTITION-LEVEL SORT, 772) + Reducer 3 <- Map 20 (PARTITION-LEVEL SORT, 733), Reducer 2 (PARTITION-LEVEL SORT, 733) Reducer 31 <- Map 30 (GROUP, 2) - Reducer 4 <- Reducer 26 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 31 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Reducer 5 (GROUP, 1009) + Reducer 4 <- Map 24 (PARTITION-LEVEL SORT, 482), Reducer 3 (PARTITION-LEVEL SORT, 482) + Reducer 5 <- Reducer 26 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 31 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 7 <- Reducer 6 (GROUP, 1009) - Reducer 8 <- Reducer 7 (SORT, 1) + Reducer 8 <- Reducer 7 (GROUP, 1009) + Reducer 9 <- Reducer 8 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Execution mode: vectorized - Map 10 Map Operator Tree: TableScan alias: customer_address @@ -345,7 +325,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 12 + 1 Map 10 Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -355,7 +335,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 13 + Map 11 Map Operator Tree: TableScan alias: catalog_sales @@ -375,7 +355,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 18 + Map 16 Map Operator Tree: TableScan alias: web_sales @@ -395,7 +375,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 19 + Map 17 Map Operator Tree: TableScan alias: date_dim @@ -414,7 +394,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 20 + Map 18 Map Operator Tree: TableScan alias: item @@ -433,7 +413,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 21 + Map 19 Map Operator Tree: TableScan alias: customer @@ -453,6 +433,46 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized + Map 20 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + Execution mode: vectorized + Map 24 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_month_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized Map 25 Map Operator Tree: TableScan @@ -501,42 +521,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_month_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col5 - Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: int) - sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE - Reducer 14 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -552,7 +537,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Reducer 15 + Reducer 13 Reduce Operator Tree: Join Operator condition map: @@ -567,7 +552,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE - Reducer 16 + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -587,7 +572,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE - Reducer 17 + Reducer 15 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -612,15 +597,14 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 1 _col1 (type: int) + outputColumnNames: _col5 + Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col5 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE Reducer 26 Execution mode: vectorized Reduce Operator Tree: @@ -641,24 +625,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col5 (type: int) - outputColumnNames: _col2, _col4, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 0 _col5 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col5, _col7, _col9 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 1 - outputColumnNames: _col2, _col4, _col10 + outputColumnNames: _col5, _col7, _col9 input vertices: - 1 Reducer 24 - Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE + 1 Reducer 23 + Statistics: Num rows: 633595212 Data size: 61598310416 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int) + key expressions: _col7 (type: int) + sort order: + + Map-reduce partition columns: _col7 (type: int) + Statistics: Num rows: 633595212 Data size: 61598310416 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col9 (type: decimal(7,2)) Reducer 31 Execution mode: vectorized Reduce Operator Tree: @@ -672,6 +658,20 @@ STAGE PLANS: Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col9, _col12 + Statistics: Num rows: 696954748 Data size: 67758142926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 696954748 Data size: 67758142926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col9 (type: decimal(7,2)), _col12 (type: int) + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -681,12 +681,12 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col2, _col4, _col10, _col13 - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col5, _col9, _col12, _col13 + Statistics: Num rows: 6363893803988 Data size: 7746260663523866 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int) + expressions: _col5 (type: int), _col9 (type: decimal(7,2)), _col12 (type: int), _col13 (type: int) outputColumnNames: _col0, _col4, _col11, _col13 - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6363893803988 Data size: 7746260663523866 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -696,16 +696,16 @@ STAGE PLANS: outputColumnNames: _col0, _col4, _col11, _col13 input vertices: 1 Reducer 29 - Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6363893803988 Data size: 7803535707759758 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6363893803988 Data size: 7803535707759758 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6363893803988 Data size: 7803535707759758 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: decimal(7,2)), _col2 (type: int), _col3 (type: int) - Reducer 5 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -734,7 +734,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -759,7 +759,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 7 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -778,7 +778,7 @@ STAGE PLANS: Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: int) - Reducer 8 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query72.q.out b/ql/src/test/results/clientpositive/perf/spark/query72.q.out index 37cf70413e..3292856434 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query72.q.out @@ -69,7 +69,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 9 Map Operator Tree: TableScan alias: warehouse @@ -94,7 +94,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 16 Map Operator Tree: TableScan alias: household_demographics @@ -114,7 +114,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 16 + Map 17 Map Operator Tree: TableScan alias: promotion @@ -134,50 +134,57 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 338), Reducer 9 (PARTITION-LEVEL SORT, 338) - Reducer 11 <- Map 17 (PARTITION-LEVEL SORT, 452), Reducer 10 (PARTITION-LEVEL SORT, 452) - Reducer 12 <- Map 18 (PARTITION-LEVEL SORT, 492), Reducer 11 (PARTITION-LEVEL SORT, 492) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 186), Reducer 12 (PARTITION-LEVEL SORT, 186) - Reducer 3 <- Map 19 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67) - Reducer 4 <- Map 20 (PARTITION-LEVEL SORT, 97), Reducer 3 (PARTITION-LEVEL SORT, 97) - Reducer 5 <- Reducer 4 (GROUP, 80) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 14 (PARTITION-LEVEL SORT, 306) + Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 338), Reducer 11 (PARTITION-LEVEL SORT, 338) + Reducer 13 <- Map 18 (PARTITION-LEVEL SORT, 452), Reducer 12 (PARTITION-LEVEL SORT, 452) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 97), Reducer 8 (PARTITION-LEVEL SORT, 97) + Reducer 3 <- Reducer 2 (GROUP, 80) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 496), Reducer 13 (PARTITION-LEVEL SORT, 496) + Reducer 7 <- Map 19 (PARTITION-LEVEL SORT, 181), Reducer 6 (PARTITION-LEVEL SORT, 181) + Reducer 8 <- Map 20 (PARTITION-LEVEL SORT, 199), Reducer 7 (PARTITION-LEVEL SORT, 199) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: inventory - filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: cr_item_sk is not null (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + predicate: cr_item_sk is not null (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5 - input vertices: - 1 Map 7 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: string) + expressions: cr_item_sk (type: int), cr_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 13 + Map 10 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: (cs_item_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk is not null and cs_ship_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + Execution mode: vectorized + Map 14 Map Operator Tree: TableScan alias: d1 @@ -197,7 +204,7 @@ STAGE PLANS: Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: customer_demographics @@ -216,7 +223,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 17 + Map 18 Map Operator Tree: TableScan alias: item @@ -236,26 +243,6 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 18 - Map Operator Tree: - TableScan - alias: d3 - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_date (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Map 19 Map Operator Tree: TableScan @@ -278,43 +265,72 @@ STAGE PLANS: Map 20 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: cr_item_sk is not null (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: d3 + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: cr_item_sk is not null (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_item_sk (type: int), cr_order_number (type: int) + expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 8 + Map 5 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_item_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk is not null and cs_ship_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: inventory + filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5 + input vertices: + 1 Map 9 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: string) Execution mode: vectorized - Reducer 10 + Local Work: + Map Reduce Local Work + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int) + Reducer 12 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -334,7 +350,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col9, _col10 input vertices: - 1 Map 15 + 1 Map 16 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -344,7 +360,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col16 input vertices: - 1 Map 16 + 1 Map 17 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int) @@ -352,7 +368,7 @@ STAGE PLANS: Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col16 (type: int) - Reducer 11 + Reducer 13 Reduce Operator Tree: Join Operator condition map: @@ -362,101 +378,43 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col16, _col18 Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col16 (type: int), _col18 (type: string) - Reducer 12 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col16, _col18, _col20 - Statistics: Num rows: 510191624 Data size: 69090195216 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0D)) (type: boolean) - Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: string), _col4 (type: int), _col6 (type: int), _col7 (type: int), _col10 (type: int), _col16 (type: int) - outputColumnNames: _col3, _col8, _col10, _col11, _col14, _col20 - Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string), _col10 (type: int), _col11 (type: int), _col14 (type: int), _col20 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col8 (type: int) - outputColumnNames: _col0, _col3, _col5, _col9, _col14, _col16, _col17, _col20, _col26 - Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 < _col17) (type: boolean) - Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col20 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col20 (type: int) - Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col9 (type: string), _col14 (type: int), _col16 (type: int), _col26 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col20 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col5, _col9, _col14, _col16, _col20, _col26 - Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col14 (type: int), _col16 (type: int), _col5 (type: string), _col9 (type: string), _col20 (type: int), _col26 (type: int) - outputColumnNames: _col4, _col6, _col13, _col15, _col22, _col28 - Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE + expressions: _col18 (type: string), _col1 (type: int), _col4 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col16 (type: int) + outputColumnNames: _col1, _col3, _col6, _col8, _col9, _col11, _col12, _col18 + Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: int), _col6 (type: int) - sort order: ++ - Map-reduce partition columns: _col4 (type: int), _col6 (type: int) - Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE - value expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int), _col28 (type: int) - Reducer 4 + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: int), _col8 (type: int), _col9 (type: int), _col11 (type: string), _col12 (type: int), _col18 (type: int) + Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Right Outer Join 0 to 1 keys: - 0 _col4 (type: int), _col6 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col13, _col15, _col22, _col28 - Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int), _col1 (type: int) + 1 _col4 (type: int), _col6 (type: int) + outputColumnNames: _col15, _col17, _col24, _col30 + Statistics: Num rows: 75451675 Data size: 10217672727 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col15 (type: string), _col13 (type: string), _col22 (type: int), CASE WHEN (_col28 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col28 is not null) THEN (1) ELSE (0) END (type: int) + expressions: _col17 (type: string), _col15 (type: string), _col24 (type: int), CASE WHEN (_col30 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col30 is not null) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672727 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col3), count(_col4), count() keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672727 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672727 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -464,20 +422,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836295 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: -+++ - Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836295 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836295 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE @@ -488,22 +446,64 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col6 (type: int) + outputColumnNames: _col0, _col3, _col5, _col7, _col9, _col12, _col14, _col15, _col17, _col18, _col24 + Statistics: Num rows: 510191624 Data size: 69090195216 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 < _col15) (type: boolean) + Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col18 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col18 (type: int) + Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: string), _col7 (type: string), _col9 (type: int), _col12 (type: int), _col14 (type: int), _col17 (type: string), _col24 (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col18 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col5, _col7, _col9, _col12, _col14, _col17, _col18, _col24 + Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col9 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int) + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: string), _col7 (type: string), _col12 (type: int), _col14 (type: int), _col17 (type: string), _col18 (type: int), _col24 (type: int) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col9 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7, _col12, _col14, _col17, _col18, _col24, _col28 + Statistics: Num rows: 205777295 Data size: 27866379834 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(_col28) > (UDFToDouble(_col17) + 5.0D)) (type: boolean) + Statistics: Num rows: 68592431 Data size: 9288793187 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col14 (type: int), _col5 (type: string), _col7 (type: string), _col18 (type: int), _col24 (type: int) + outputColumnNames: _col4, _col6, _col13, _col15, _col22, _col28 + Statistics: Num rows: 68592431 Data size: 9288793187 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int), _col6 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: int), _col6 (type: int) + Statistics: Num rows: 68592431 Data size: 9288793187 Basic stats: COMPLETE Column stats: NONE + value expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int), _col28 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index af4a835507..f5df4241ba 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -174,7 +174,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 14 + Map 11 Map Operator Tree: TableScan alias: web_page @@ -189,12 +189,12 @@ STAGE PLANS: Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col10 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 15 + Map 12 Map Operator Tree: TableScan alias: reason @@ -209,7 +209,7 @@ STAGE PLANS: Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col13 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -218,36 +218,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 52), Map 9 (PARTITION-LEVEL SORT, 52) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67) - Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 68), Reducer 3 (PARTITION-LEVEL SORT, 68) - Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 12), Reducer 4 (PARTITION-LEVEL SORT, 12) - Reducer 6 <- Map 13 (PARTITION-LEVEL SORT, 165), Reducer 5 (PARTITION-LEVEL SORT, 165) - Reducer 7 <- Reducer 6 (GROUP, 71) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 62), Map 9 (PARTITION-LEVEL SORT, 62) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 57), Reducer 2 (PARTITION-LEVEL SORT, 57) + Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 81), Reducer 3 (PARTITION-LEVEL SORT, 81) + Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 13), Reducer 4 (PARTITION-LEVEL SORT, 13) + Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 167), Reducer 5 (PARTITION-LEVEL SORT, 167) + Reducer 7 <- Reducer 6 (GROUP, 59) Reducer 8 <- Reducer 7 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: web_sales - filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Execution mode: vectorized - Map 10 Map Operator Tree: TableScan alias: web_returns @@ -267,7 +247,26 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map 11 + Map 10 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 13 Map Operator Tree: TableScan alias: cd1 @@ -287,7 +286,7 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 12 + Map 14 Map Operator Tree: TableScan alias: cd2 @@ -306,7 +305,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 13 + Map 15 Map Operator Tree: TableScan alias: customer_address @@ -329,21 +328,22 @@ STAGE PLANS: Map 9 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + key expressions: _col1 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -351,114 +351,118 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + 0 _col0 (type: int), _col5 (type: int) + 1 _col1 (type: int), _col3 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14 Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col10 (type: int), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col5 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6, _col10, _col11, _col12, _col13, _col15, _col16 + 0 _col8 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col10, _col12, _col13, _col14 Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col10 (type: int) - sort order: + - Map-reduce partition columns: _col10 (type: int) - Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col11 (type: int), _col12 (type: int), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col12, _col13, _col14 + input vertices: + 1 Map 11 + Statistics: Num rows: 63889183 Data size: 8687081595 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col6, _col7, _col12, _col13, _col14, _col19 + input vertices: + 1 Map 12 + Statistics: Num rows: 70278102 Data size: 9555789961 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 70278102 Data size: 9555789961 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)), _col19 (type: string) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6, _col11, _col12, _col13, _col15, _col16, _col18, _col19 - Statistics: Num rows: 63889183 Data size: 8687081595 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col6, _col7, _col12, _col13, _col14, _col19, _col21, _col22 + Statistics: Num rows: 77305913 Data size: 10511369184 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col18 = 'D') and (_col19 = 'Primary') and _col5 BETWEEN 50 AND 100) or ((_col18 = 'M') and (_col19 = '4 yr Degree') and _col5 BETWEEN 100 AND 150) or ((_col18 = 'U') and (_col19 = 'Advanced Degree') and _col5 BETWEEN 150 AND 200)) (type: boolean) - Statistics: Num rows: 5324097 Data size: 723923250 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col21 = 'D') and (_col22 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col21 = 'M') and (_col22 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col21 = 'U') and (_col22 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) (type: boolean) + Statistics: Num rows: 6442158 Data size: 875947239 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col12 (type: int), _col18 (type: string), _col19 (type: string) + key expressions: _col3 (type: int), _col21 (type: string), _col22 (type: string) sort order: +++ - Map-reduce partition columns: _col12 (type: int), _col18 (type: string), _col19 (type: string) - Statistics: Num rows: 5324097 Data size: 723923250 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) + Map-reduce partition columns: _col3 (type: int), _col21 (type: string), _col22 (type: string) + Statistics: Num rows: 6442158 Data size: 875947239 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col14 (type: decimal(7,2)), _col19 (type: string) Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col12 (type: int), _col18 (type: string), _col19 (type: string) + 0 _col3 (type: int), _col21 (type: string), _col22 (type: string) 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col2, _col4, _col6, _col11, _col13, _col15, _col16 - Statistics: Num rows: 5856506 Data size: 796315592 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col6, _col7, _col12, _col14, _col19 + Statistics: Num rows: 7086373 Data size: 963541983 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col11 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col11 (type: int) - Statistics: Num rows: 5856506 Data size: 796315592 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 7086373 Data size: 963541983 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col14 (type: decimal(7,2)), _col19 (type: string) Reducer 6 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col11 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col6, _col13, _col15, _col16, _col24 + outputColumnNames: _col6, _col7, _col12, _col14, _col19, _col27 Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((_col24 = 'KY') or (_col24 = 'GA') or (_col24 = 'NM')) and _col6 BETWEEN 100 AND 200) or (((_col24 = 'MT') or (_col24 = 'OR') or (_col24 = 'IN')) and _col6 BETWEEN 150 AND 300) or (((_col24 = 'WI') or (_col24 = 'MO') or (_col24 = 'WV')) and _col6 BETWEEN 50 AND 250)) (type: boolean) + predicate: ((((_col27 = 'KY') or (_col27 = 'GA') or (_col27 = 'NM')) and _col14 BETWEEN 100 AND 200) or (((_col27 = 'MT') or (_col27 = 'OR') or (_col27 = 'IN')) and _col14 BETWEEN 150 AND 300) or (((_col27 = 'WI') or (_col27 = 'MO') or (_col27 = 'WV')) and _col14 BETWEEN 50 AND 250)) (type: boolean) Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col13, _col15, _col16 - input vertices: - 1 Map 14 - Statistics: Num rows: 8066665 Data size: 8186696581 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col13 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col15, _col16, _col28 - input vertices: - 1 Map 15 - Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col4), count(_col4), sum(_col16), count(_col16), sum(_col15), count(_col15) - keys: _col28 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) + Select Operator + expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col19 (type: string) + outputColumnNames: _col6, _col7, _col12, _col19 + Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col12), count(_col12), sum(_col7), count(_col7), sum(_col6), count(_col6) + keys: _col19 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) Reducer 7 Execution mode: vectorized Reduce Operator Tree: @@ -467,15 +471,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(_col1) / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), substr(_col0, 1, 20) (type: string) outputColumnNames: _col4, _col5, _col6, _col7 - Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22)) sort order: ++++ - Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 8 Execution mode: vectorized @@ -483,7 +487,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(37,22)), KEY.reducesinkkey3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out index 6b3549216c..080b03a77b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -89,224 +89,225 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 18 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 18 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Map 13 <- Reducer 12 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 7 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized + Reducer 6 vectorized File Output Operator [FS_270] Limit [LIM_269] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_268] (rows=421657640 width=88) + Select Operator [SEL_268] (rows=510205767 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 6 [SIMPLE_EDGE] vectorized + <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_267] - Select Operator [SEL_266] (rows=421657640 width=88) + Select Operator [SEL_266] (rows=510205767 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_265] (rows=421657640 width=88) + Group By Operator [GBY_265] (rows=510205767 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_50] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_49] (rows=843315281 width=88) + Group By Operator [GBY_48] (rows=1020411534 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_93] (rows=843315281 width=88) + Top N Key Operator [TNK_95] (rows=1020411534 width=88) keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_47] (rows=843315281 width=88) + Select Operator [SEL_46] (rows=1020411534 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_213] (rows=843315281 width=88) - Conds:RS_44._col3=RS_251._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] + Merge Join Operator [MERGEJOIN_207] (rows=1020411534 width=88) + Conds:RS_43._col11=RS_256._col0(Inner),Output:["_col3","_col7","_col15","_col20","_col24","_col25"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_256] PartitionCols:_col0 - Select Operator [SEL_250] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_249] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_32] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_212] (rows=766650239 width=88) - Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col7, _col8, _col9 - Merge Join Operator [MERGEJOIN_211] (rows=348467716 width=135) - Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] - <-Reducer 13 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_210] (rows=63350266 width=77) - Conds:RS_242._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_224] - PartitionCols:_col0 - Select Operator [SEL_219] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_216] (rows=73049 width=1119) - predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] - PartitionCols:_col0 - Select Operator [SEL_241] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_240] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_15] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_209] (rows=316788826 width=135) - Conds:RS_264._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_222] - PartitionCols:_col0 - Select Operator [SEL_218] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_215] (rows=73049 width=1119) - predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_264] - PartitionCols:_col0 - Select Operator [SEL_263] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_262] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_9] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_245] - Group By Operator [GBY_243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_109] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_210] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_248] - Group By Operator [GBY_246] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_126] - Group By Operator [GBY_125] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_124] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_210] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Group By Operator [GBY_237] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_234] (rows=462000 width=1436) - Output:["_col0"] - Select Operator [SEL_232] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_231] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_261] - Group By Operator [GBY_260] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] - Group By Operator [GBY_226] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_223] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] - <-Reducer 3 [SIMPLE_EDGE] + Select Operator [SEL_255] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_254] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_34] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col11 + Merge Join Operator [MERGEJOIN_206] (rows=927646829 width=88) + Conds:RS_40._col1, _col2=RS_41._col12, _col11(Inner),Output:["_col3","_col7","_col11","_col15","_col20"] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_41] - PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_208] (rows=696954748 width=88) - Conds:RS_38._col1=RS_233._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] + PartitionCols:_col12, _col11 + Select Operator [SEL_33] (rows=843315281 width=88) + Output:["_col1","_col5","_col9","_col11","_col12","_col14"] + Merge Join Operator [MERGEJOIN_205] (rows=843315281 width=88) + Conds:RS_30._col3=RS_248._col0(Inner),Output:["_col1","_col5","_col7","_col8","_col10","_col16"] + <-Map 19 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_248] + PartitionCols:_col0 + Select Operator [SEL_247] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_246] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_18] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_204] (rows=766650239 width=88) + Conds:RS_27._col6=RS_214._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col10"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_210] (rows=73049 width=1119) + Output:["_col0"] + Filter Operator [FIL_208] (rows=73049 width=1119) + predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_203] (rows=696954748 width=88) + Conds:RS_24._col0=RS_215._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_215] + PartitionCols:_col0 + Select Operator [SEL_211] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_209] (rows=36524 width=1119) + predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_202] (rows=633595212 width=88) + Conds:RS_264._col1, _col2, _col4=RS_228._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_228] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_227] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_226] (rows=57591150 width=77) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_9] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_264] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_263] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_262] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_22_store_returns_sr_customer_sk_min) AND DynamicValue(RS_22_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_22_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_22_store_returns_sr_item_sk_min) AND DynamicValue(RS_22_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_31_store_s_store_sk_min) AND DynamicValue(RS_31_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_31_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_22_store_returns_sr_ticket_number_min) AND DynamicValue(RS_22_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_22_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_6] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_245] + Group By Operator [GBY_244] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + Group By Operator [GBY_218] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_216] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_211] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Group By Operator [GBY_238] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_235] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_229] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_227] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_241] + Group By Operator [GBY_240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_236] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_230] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_227] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_243] + Group By Operator [GBY_242] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_237] + Group By Operator [GBY_234] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_231] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_227] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_253] + Group By Operator [GBY_252] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_251] + Group By Operator [GBY_250] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_249] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_247] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_261] + Group By Operator [GBY_260] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_259] + Group By Operator [GBY_258] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_257] (rows=462000 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_255] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_201] (rows=316788826 width=135) + Conds:RS_225._col0=RS_212._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_232] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_207] (rows=633595212 width=88) - Conds:RS_259._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_220] - PartitionCols:_col0 - Select Operator [SEL_217] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_214] (rows=36524 width=1119) - predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] - PartitionCols:_col0 - Select Operator [SEL_258] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_257] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_244] - Please refer to the previous Group By Operator [GBY_243] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_247] - Please refer to the previous Group By Operator [GBY_246] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_238] - Please refer to the previous Group By Operator [GBY_237] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] - Group By Operator [GBY_253] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_252] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_250] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_230] - Group By Operator [GBY_229] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_227] - Group By Operator [GBY_225] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_217] + Please refer to the previous Select Operator [SEL_210] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_224] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_223] (rows=287989836 width=135) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_38_d3_d_date_sk_min) AND DynamicValue(RS_38_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_219] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_213] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_210] diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out index 349d429ab3..d9024727dd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out @@ -100,32 +100,32 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE) -Map 31 <- Reducer 16 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) +Map 31 <- Reducer 19 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 9 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 12 <- Map 25 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 30 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 11 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 31 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 27 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 21 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 30 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 22 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 25 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 20 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 21 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 24 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 27 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 30 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -139,13 +139,13 @@ Stage-0 predicate:(_col3 > _col4) Merge Join Operator [MERGEJOIN_290] (rows=231911707 width=321) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_380] Select Operator [SEL_379] (rows=1 width=232) Output:["_col0"] Group By Operator [GBY_378] (rows=1 width=232) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_377] Group By Operator [GBY_376] (rows=1 width=232) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] @@ -153,13 +153,13 @@ Stage-0 Output:["_col10"] Group By Operator [GBY_374] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_78] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Group By Operator [GBY_77] (rows=927646829 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col11, _col12, _col6, _col8, _col15, _col16, _col17, _col18, _col19, _col22 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col22, _col17, _col18, _col19, _col20, _col21, _col13, _col14, _col8, _col10 Merge Join Operator [MERGEJOIN_289] (rows=927646829 width=88) - Conds:RS_73._col9, _col13=RS_355._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col22"] + Conds:RS_73._col11, _col15=RS_355._col1, upper(_col2)(Inner),Output:["_col4","_col8","_col10","_col13","_col14","_col17","_col18","_col19","_col20","_col21","_col22"] <-Map 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_355] PartitionCols:_col1, upper(_col2) @@ -169,126 +169,126 @@ Stage-0 predicate:(ca_zip is not null and upper(ca_country) is not null) TableScan [TS_15] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_state","ca_zip","ca_country"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_73] - PartitionCols:_col9, _col13 + PartitionCols:_col11, _col15 Merge Join Operator [MERGEJOIN_288] (rows=843315281 width=88) - Conds:RS_70._col0, _col3=RS_334._col0, _col1(Inner),Output:["_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_334] - PartitionCols:_col0, _col1 - Select Operator [SEL_330] (rows=57591150 width=77) - Output:["_col0","_col1"] - Filter Operator [FIL_329] (rows=57591150 width=77) - predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 11 [SIMPLE_EDGE] + Conds:RS_70._col0=RS_317._col0(Inner),Output:["_col4","_col8","_col10","_col11","_col13","_col14","_col15","_col17","_col18","_col19","_col20","_col21"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_317] + PartitionCols:_col0 + Select Operator [SEL_314] (rows=462000 width=1436) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_312] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_70] - PartitionCols:_col0, _col3 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_287] (rows=766650239 width=88) - Conds:RS_67._col0=RS_297._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] + Conds:RS_67._col1=RS_341._col0(Inner),Output:["_col0","_col4","_col8","_col10","_col11","_col13","_col14","_col15"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_341] PartitionCols:_col0 - Select Operator [SEL_294] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_292] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] - <-Reducer 20 [SIMPLE_EDGE] + Select Operator [SEL_338] (rows=80000000 width=860) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_337] (rows=80000000 width=860) + predicate:(c_birth_country is not null and c_customer_sk is not null) + TableScan [TS_12] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_67] - PartitionCols:_col0 + PartitionCols:_col1 Merge Join Operator [MERGEJOIN_286] (rows=696954748 width=88) - Conds:RS_64._col1=RS_321._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + Conds:RS_64._col2=RS_329._col0(Inner),Output:["_col0","_col1","_col4","_col8","_col10","_col11"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] PartitionCols:_col0 - Select Operator [SEL_318] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_317] (rows=80000000 width=860) - predicate:(c_birth_country is not null and c_customer_sk is not null) - TableScan [TS_9] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 19 [SIMPLE_EDGE] + Select Operator [SEL_326] (rows=852 width=1910) + Output:["_col0","_col1","_col3","_col4"] + Filter Operator [FIL_325] (rows=852 width=1910) + predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) + TableScan [TS_9] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_64] - PartitionCols:_col1 + PartitionCols:_col2 Merge Join Operator [MERGEJOIN_285] (rows=633595212 width=88) - Conds:RS_373._col2=RS_309._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col8","_col9"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] - PartitionCols:_col0 - Select Operator [SEL_306] (rows=852 width=1910) - Output:["_col0","_col1","_col3","_col4"] - Filter Operator [FIL_305] (rows=852 width=1910) - predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) - TableScan [TS_6] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + Conds:RS_373._col0, _col3=RS_296._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_296] + PartitionCols:_col0, _col1 + Select Operator [SEL_292] (rows=57591150 width=77) + Output:["_col0","_col1"] + Filter Operator [FIL_291] (rows=57591150 width=77) + predicate:(sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_3] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] <-Map 31 [SIMPLE_EDGE] vectorized SHUFFLE [RS_373] - PartitionCols:_col2 + PartitionCols:_col0, _col3 Select Operator [SEL_372] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_371] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_65_customer_c_customer_sk_min) AND DynamicValue(RS_65_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_65_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_68_item_i_item_sk_min) AND DynamicValue(RS_68_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_68_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_71_store_returns_sr_item_sk_min) AND DynamicValue(RS_71_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_71_store_returns_sr_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_62_store_s_store_sk_min) AND DynamicValue(RS_62_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_62_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_71_store_returns_sr_ticket_number_min) AND DynamicValue(RS_71_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_71_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_68_customer_c_customer_sk_min) AND DynamicValue(RS_68_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_68_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_62_store_returns_sr_item_sk_min) AND DynamicValue(RS_62_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_62_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_71_item_i_item_sk_min) AND DynamicValue(RS_71_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_71_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_65_store_s_store_sk_min) AND DynamicValue(RS_65_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_65_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_62_store_returns_sr_ticket_number_min) AND DynamicValue(RS_62_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_62_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_43] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Group By Operator [GBY_365] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_294] - <-Reducer 21 [BROADCAST_EDGE] vectorized + <-Reducer 19 [BROADCAST_EDGE] vectorized BROADCAST [RS_362] Group By Operator [GBY_361] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] - Group By Operator [GBY_312] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_310] (rows=852 width=1910) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_305] + Group By Operator [GBY_301] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_297] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_306] - <-Reducer 24 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_292] + <-Reducer 20 [BROADCAST_EDGE] vectorized BROADCAST [RS_364] Group By Operator [GBY_363] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] - Group By Operator [GBY_324] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_322] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_318] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_368] - Group By Operator [GBY_367] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] - Group By Operator [GBY_339] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] + Group By Operator [GBY_302] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_335] (rows=57591150 width=77) + Select Operator [SEL_298] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] - <-Reducer 29 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_292] + <-Reducer 23 [BROADCAST_EDGE] vectorized BROADCAST [RS_370] Group By Operator [GBY_369] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] - Group By Operator [GBY_340] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_336] (rows=57591150 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + Group By Operator [GBY_320] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_318] (rows=462000 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_314] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_366] + Group By Operator [GBY_365] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + Group By Operator [GBY_332] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_330] (rows=852 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_326] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_368] + Group By Operator [GBY_367] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_346] + Group By Operator [GBY_344] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] + Select Operator [SEL_342] (rows=80000000 width=860) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_338] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_360] Select Operator [SEL_359] (rows=231911707 width=88) @@ -303,115 +303,115 @@ Stage-0 SHUFFLE [RS_35] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_34] (rows=927646829 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col17, _col18, _col12, _col22, _col6, _col7, _col9, _col10, _col14 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col19, _col20, _col14, _col22, _col8, _col9, _col11, _col12, _col16 Merge Join Operator [MERGEJOIN_284] (rows=927646829 width=88) - Conds:RS_30._col15, _col19=RS_354._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col17","_col18","_col22"] + Conds:RS_30._col17, _col21=RS_354._col1, upper(_col2)(Inner),Output:["_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col19","_col20","_col22"] <-Map 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_354] PartitionCols:_col1, upper(_col2) Please refer to the previous Select Operator [SEL_353] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] - PartitionCols:_col15, _col19 + PartitionCols:_col17, _col21 Merge Join Operator [MERGEJOIN_283] (rows=843315281 width=88) - Conds:RS_27._col0, _col3=RS_331._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_330] + Conds:RS_27._col1=RS_339._col0(Inner),Output:["_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col17","_col19","_col20","_col21"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_339] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_338] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] - PartitionCols:_col0, _col3 + PartitionCols:_col1 Merge Join Operator [MERGEJOIN_282] (rows=766650239 width=88) - Conds:RS_24._col1=RS_319._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + Conds:RS_24._col2=RS_327._col0(Inner),Output:["_col1","_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col17"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] - PartitionCols:_col1 + PartitionCols:_col2 Merge Join Operator [MERGEJOIN_281] (rows=696954748 width=88) - Conds:RS_21._col2=RS_307._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + Conds:RS_21._col0=RS_315._col0(Inner),Output:["_col1","_col2","_col4","_col8","_col9","_col11","_col12"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_315] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_306] + Select Operator [SEL_313] (rows=231000 width=1436) + Output:["_col0","_col1","_col2","_col4","_col5"] + Filter Operator [FIL_311] (rows=231000 width=1436) + predicate:((i_color = 'orchid') and i_item_sk is not null) + Please refer to the previous TableScan [TS_6] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] - PartitionCols:_col2 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_280] (rows=633595212 width=88) - Conds:RS_351._col0=RS_295._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col10"] + Conds:RS_351._col0, _col3=RS_293._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] - PartitionCols:_col0 - Select Operator [SEL_293] (rows=231000 width=1436) - Output:["_col0","_col1","_col2","_col4","_col5"] - Filter Operator [FIL_291] (rows=231000 width=1436) - predicate:((i_color = 'orchid') and i_item_sk is not null) - Please refer to the previous TableScan [TS_3] + PARTITION_ONLY_SHUFFLE [RS_293] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_292] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_351] - PartitionCols:_col0 + PartitionCols:_col0, _col3 Select Operator [SEL_350] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_349] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_25_customer_c_customer_sk_min) AND DynamicValue(RS_25_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_25_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_store_returns_sr_item_sk_min) AND DynamicValue(RS_28_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_store_returns_sr_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_28_store_returns_sr_ticket_number_min) AND DynamicValue(RS_28_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_28_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_28_customer_c_customer_sk_min) AND DynamicValue(RS_28_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_28_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_19_store_returns_sr_item_sk_min) AND DynamicValue(RS_19_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_19_store_returns_sr_ticket_number_min) AND DynamicValue(RS_19_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_19_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_304] - Group By Operator [GBY_303] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + BROADCAST [RS_308] + Group By Operator [GBY_307] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] + PARTITION_ONLY_SHUFFLE [RS_303] Group By Operator [GBY_299] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=231000 width=1436) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_294] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_293] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_316] - Group By Operator [GBY_315] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_292] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_310] + Group By Operator [GBY_309] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] + Group By Operator [GBY_300] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_295] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_292] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_324] + Group By Operator [GBY_323] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] - Group By Operator [GBY_311] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] + Group By Operator [GBY_319] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_308] (rows=852 width=1910) + Select Operator [SEL_316] (rows=231000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_306] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_328] - Group By Operator [GBY_327] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] - Group By Operator [GBY_323] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_320] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_318] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_346] - Group By Operator [GBY_345] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] - Group By Operator [GBY_337] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_332] (rows=57591150 width=77) + Please refer to the previous Select Operator [SEL_313] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_336] + Group By Operator [GBY_335] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] + Group By Operator [GBY_331] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_328] (rows=852 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] - <-Reducer 27 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_326] + <-Reducer 28 [BROADCAST_EDGE] vectorized BROADCAST [RS_348] Group By Operator [GBY_347] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] - Group By Operator [GBY_338] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_333] (rows=57591150 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_345] + Group By Operator [GBY_343] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] + Select Operator [SEL_340] (rows=80000000 width=860) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_338] diff --git a/ql/src/test/results/clientpositive/perf/tez/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/query25.q.out index 044df14792..2f2641811c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query25.q.out @@ -95,220 +95,221 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 16 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) -Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 17 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Map 13 <- Reducer 12 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 7 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_270] - Limit [LIM_269] (rows=100 width=88) + Reducer 6 vectorized + File Output Operator [FS_268] + Limit [LIM_267] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_268] (rows=421657640 width=88) + Select Operator [SEL_266] (rows=510205767 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_267] - Group By Operator [GBY_266] (rows=421657640 width=88) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_265] + Group By Operator [GBY_264] (rows=510205767 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_48] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_48] (rows=843315281 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","sum(_col20)","sum(_col12)"],keys:_col25, _col26, _col28, _col29 - Top N Key Operator [TNK_95] (rows=843315281 width=88) - keys:_col25, _col26, _col28, _col29,sort order:++++,top n:100 - Merge Join Operator [MERGEJOIN_214] (rows=843315281 width=88) - Conds:RS_44._col3=RS_252._col0(Inner),Output:["_col5","_col12","_col20","_col25","_col26","_col28","_col29"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_252] + Group By Operator [GBY_47] (rows=1020411534 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col18)","sum(_col23)","sum(_col3)"],keys:_col28, _col29, _col8, _col9 + Top N Key Operator [TNK_94] (rows=1020411534 width=88) + keys:_col28, _col29, _col8, _col9,sort order:++++,top n:100 + Merge Join Operator [MERGEJOIN_206] (rows=1020411534 width=88) + Conds:RS_43._col14=RS_255._col0(Inner),Output:["_col3","_col8","_col9","_col18","_col23","_col28","_col29"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] PartitionCols:_col0 - Select Operator [SEL_251] (rows=1704 width=1910) + Select Operator [SEL_254] (rows=462000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_250] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_32] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_213] (rows=766650239 width=88) - Conds:RS_41._col1=RS_243._col0(Inner),Output:["_col3","_col5","_col12","_col20","_col25","_col26"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] - PartitionCols:_col0 - Select Operator [SEL_242] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_241] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_29] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 3 [SIMPLE_EDGE] + Filter Operator [FIL_253] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_34] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col14 + Merge Join Operator [MERGEJOIN_205] (rows=927646829 width=88) + Conds:RS_40._col1, _col2=RS_41._col14, _col13(Inner),Output:["_col3","_col8","_col9","_col14","_col18","_col23"] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_212] (rows=696954748 width=88) - Conds:RS_38._col1, _col2, _col4=RS_39._col8, _col9, _col10(Inner),Output:["_col1","_col3","_col5","_col12","_col20"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col8, _col9, _col10 - Merge Join Operator [MERGEJOIN_211] (rows=348467716 width=135) - Conds:RS_25._col2, _col1=RS_26._col1, _col2(Inner),Output:["_col3","_col8","_col9","_col10","_col11"] - <-Reducer 13 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_26] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_210] (rows=63350266 width=77) - Conds:RS_234._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_225] - PartitionCols:_col0 - Select Operator [SEL_220] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_217] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - PartitionCols:_col0 - Select Operator [SEL_233] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_232] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_209] (rows=316788826 width=135) - Conds:RS_265._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_223] - PartitionCols:_col0 - Select Operator [SEL_219] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_216] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) - Please refer to the previous TableScan [TS_3] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] - PartitionCols:_col0 - Select Operator [SEL_264] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_263] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_23_d3_d_date_sk_min) AND DynamicValue(RS_23_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_237] - Group By Operator [GBY_235] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_106] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_210] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_240] - Group By Operator [GBY_238] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_123] - Group By Operator [GBY_122] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_121] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_210] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_249] - Group By Operator [GBY_247] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_246] - Group By Operator [GBY_245] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_244] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_242] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_262] - Group By Operator [GBY_261] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_229] - Group By Operator [GBY_227] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_224] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_208] (rows=633595212 width=88) - Conds:RS_260._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_221] - PartitionCols:_col0 - Select Operator [SEL_218] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_215] (rows=18262 width=1119) - predicate:((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_260] - PartitionCols:_col0 - Select Operator [SEL_259] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_258] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_236] - Please refer to the previous Group By Operator [GBY_235] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Please refer to the previous Group By Operator [GBY_238] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_248] - Please refer to the previous Group By Operator [GBY_247] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_257] - Group By Operator [GBY_256] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_255] - Group By Operator [GBY_254] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_253] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_251] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_231] - Group By Operator [GBY_230] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] - Group By Operator [GBY_226] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] + PartitionCols:_col14, _col13 + Select Operator [SEL_33] (rows=843315281 width=88) + Output:["_col1","_col2","_col7","_col11","_col13","_col14","_col16"] + Merge Join Operator [MERGEJOIN_204] (rows=843315281 width=88) + Conds:RS_30._col3=RS_247._col0(Inner),Output:["_col1","_col5","_col7","_col8","_col10","_col18","_col19"] + <-Map 19 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_247] + PartitionCols:_col0 + Select Operator [SEL_246] (rows=1704 width=1910) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_245] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_18] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_203] (rows=766650239 width=88) + Conds:RS_27._col6=RS_213._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col10"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_213] + PartitionCols:_col0 + Select Operator [SEL_209] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_207] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_202] (rows=696954748 width=88) + Conds:RS_24._col0=RS_214._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_210] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_208] (rows=18262 width=1119) + predicate:((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_201] (rows=633595212 width=88) + Conds:RS_263._col1, _col2, _col4=RS_227._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_227] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_226] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_225] (rows=57591150 width=77) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_9] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_263] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_262] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_261] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_22_store_returns_sr_customer_sk_min) AND DynamicValue(RS_22_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_22_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_22_store_returns_sr_item_sk_min) AND DynamicValue(RS_22_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_31_store_s_store_sk_min) AND DynamicValue(RS_31_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_31_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_22_store_returns_sr_ticket_number_min) AND DynamicValue(RS_22_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_22_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_6] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_244] + Group By Operator [GBY_243] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_219] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_215] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_210] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Group By Operator [GBY_237] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_234] + Group By Operator [GBY_231] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_228] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_226] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_240] + Group By Operator [GBY_239] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_235] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_229] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_226] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_242] + Group By Operator [GBY_241] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_236] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_230] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_226] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_252] + Group By Operator [GBY_251] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_250] + Group By Operator [GBY_249] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_248] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_246] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_260] + Group By Operator [GBY_259] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_258] + Group By Operator [GBY_257] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_256] (rows=462000 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_254] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_200] (rows=316788826 width=135) + Conds:RS_224._col0=RS_211._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_211] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_209] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col0 + Select Operator [SEL_223] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_222] (rows=287989836 width=135) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_38_d3_d_date_sk_min) AND DynamicValue(RS_38_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_221] + Group By Operator [GBY_220] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_218] + Group By Operator [GBY_216] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_212] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_209] diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out index 0f019d54de..d6385c88f7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -93,226 +93,226 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 7 (BROADCAST_EDGE) -Map 8 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) -Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 20 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Map 9 <- Reducer 15 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 vectorized - File Output Operator [FS_260] - Limit [LIM_259] (rows=100 width=88) + Reducer 6 vectorized + File Output Operator [FS_270] + Limit [LIM_269] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_258] (rows=463823414 width=88) + Select Operator [SEL_268] (rows=510205767 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] - Group By Operator [GBY_256] (rows=463823414 width=88) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_267] + Group By Operator [GBY_266] (rows=510205767 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_48] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_48] (rows=927646829 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col14)","sum(_col22)","sum(_col3)"],keys:_col7, _col8, _col27, _col28 - Top N Key Operator [TNK_93] (rows=927646829 width=88) - keys:_col7, _col8, _col27, _col28,sort order:++++,top n:100 - Merge Join Operator [MERGEJOIN_205] (rows=927646829 width=88) - Conds:RS_44._col1, _col2=RS_45._col14, _col13(Inner),Output:["_col3","_col7","_col8","_col14","_col22","_col27","_col28"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col14, _col13 - Select Operator [SEL_40] (rows=843315281 width=88) - Output:["_col1","_col2","_col8","_col13","_col14","_col16","_col21","_col22"] - Merge Join Operator [MERGEJOIN_204] (rows=843315281 width=88) - Conds:RS_37._col3=RS_247._col0(Inner),Output:["_col5","_col10","_col11","_col13","_col18","_col19","_col21","_col22"] - <-Map 22 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_247] - PartitionCols:_col0 - Select Operator [SEL_246] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_245] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_25] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_203] (rows=766650239 width=88) - Conds:RS_34._col1=RS_239._col0(Inner),Output:["_col3","_col5","_col10","_col11","_col13","_col18","_col19"] + Group By Operator [GBY_47] (rows=1020411534 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col17)","sum(_col22)","sum(_col3)"],keys:_col27, _col28, _col7, _col8 + Top N Key Operator [TNK_94] (rows=1020411534 width=88) + keys:_col27, _col28, _col7, _col8,sort order:++++,top n:100 + Merge Join Operator [MERGEJOIN_206] (rows=1020411534 width=88) + Conds:RS_43._col13=RS_257._col0(Inner),Output:["_col3","_col7","_col8","_col17","_col22","_col27","_col28"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] + PartitionCols:_col0 + Select Operator [SEL_256] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_255] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_34] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col13 + Merge Join Operator [MERGEJOIN_205] (rows=927646829 width=88) + Conds:RS_40._col1, _col2=RS_41._col14, _col13(Inner),Output:["_col3","_col7","_col8","_col13","_col17","_col22"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col14, _col13 + Select Operator [SEL_33] (rows=843315281 width=88) + Output:["_col1","_col2","_col7","_col11","_col13","_col14","_col16"] + Merge Join Operator [MERGEJOIN_204] (rows=843315281 width=88) + Conds:RS_30._col3=RS_249._col0(Inner),Output:["_col1","_col5","_col7","_col8","_col10","_col18","_col19"] <-Map 20 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_239] + PARTITION_ONLY_SHUFFLE [RS_249] PartitionCols:_col0 - Select Operator [SEL_238] (rows=462000 width=1436) + Select Operator [SEL_248] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_237] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_22] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_202] (rows=696954748 width=88) - Conds:RS_31._col1, _col2, _col4=RS_32._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col10","_col11","_col13"] - <-Reducer 15 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_32] - PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_201] (rows=63350266 width=77) - Conds:RS_230._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] + Filter Operator [FIL_247] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_18] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_203] (rows=766650239 width=88) + Conds:RS_27._col6=RS_240._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col10"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_240] + PartitionCols:_col0 + Select Operator [SEL_238] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_236] (rows=36524 width=1119) + predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) + TableScan [TS_15] (rows=73049 width=1119) + default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_202] (rows=696954748 width=88) + Conds:RS_24._col0=RS_241._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_241] PartitionCols:_col0 - Select Operator [SEL_220] (rows=36524 width=1119) + Select Operator [SEL_239] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_218] (rows=36524 width=1119) - predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] - PartitionCols:_col0 - Select Operator [SEL_229] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_228] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_200] (rows=633595212 width=88) - Conds:RS_255._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] - PartitionCols:_col0 - Select Operator [SEL_219] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_217] (rows=18262 width=1119) + Filter Operator [FIL_237] (rows=18262 width=1119) predicate:((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) - Please refer to the previous TableScan [TS_9] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_255] + Please refer to the previous TableScan [TS_15] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0 - Select Operator [SEL_254] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_253] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_32_store_returns_sr_customer_sk_min) AND DynamicValue(RS_32_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_32_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_32_store_returns_sr_item_sk_min) AND DynamicValue(RS_32_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_32_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_35_item_i_item_sk_min) AND DynamicValue(RS_35_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_35_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_29_d1_d_date_sk_min) AND DynamicValue(RS_29_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_38_store_s_store_sk_min) AND DynamicValue(RS_38_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_38_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_32_store_returns_sr_ticket_number_min) AND DynamicValue(RS_32_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_32_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_227] - Group By Operator [GBY_226] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_232] - Group By Operator [GBY_231] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_121] - Group By Operator [GBY_120] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_119] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_201] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_234] - Group By Operator [GBY_233] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_126] - Group By Operator [GBY_125] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_124] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_201] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_131] - Group By Operator [GBY_130] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_129] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_201] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_244] - Group By Operator [GBY_243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_242] - Group By Operator [GBY_241] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_240] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_238] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_252] - Group By Operator [GBY_251] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_250] - Group By Operator [GBY_249] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_248] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_246] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_199] (rows=316788826 width=135) - Conds:RS_216._col0=RS_208._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_208] - PartitionCols:_col0 - Select Operator [SEL_207] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_206] (rows=73049 width=1119) - predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] - PartitionCols:_col0 - Select Operator [SEL_215] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_214] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_213] - Group By Operator [GBY_212] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_211] - Group By Operator [GBY_210] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_209] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_207] + Merge Join Operator [MERGEJOIN_201] (rows=633595212 width=88) + Conds:RS_265._col1, _col2, _col4=RS_220._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_220] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_219] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_218] (rows=57591150 width=77) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_9] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_265] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_264] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_263] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_22_store_returns_sr_customer_sk_min) AND DynamicValue(RS_22_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_22_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_22_store_returns_sr_item_sk_min) AND DynamicValue(RS_22_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_31_store_s_store_sk_min) AND DynamicValue(RS_31_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_31_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_22_store_returns_sr_ticket_number_min) AND DynamicValue(RS_22_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_22_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_6] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_231] + Group By Operator [GBY_230] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_227] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_221] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_219] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_233] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_228] + Group By Operator [GBY_225] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_222] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_219] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_235] + Group By Operator [GBY_234] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_229] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_223] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_219] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_246] + Group By Operator [GBY_245] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_244] + Group By Operator [GBY_243] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_242] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_239] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_254] + Group By Operator [GBY_253] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_252] + Group By Operator [GBY_251] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_250] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_248] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_262] + Group By Operator [GBY_261] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_260] + Group By Operator [GBY_259] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_258] (rows=462000 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_256] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_200] (rows=316788826 width=135) + Conds:RS_217._col0=RS_209._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=73049 width=1119) + Output:["_col0"] + Filter Operator [FIL_207] (rows=73049 width=1119) + predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col0 + Select Operator [SEL_216] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_215] (rows=287989836 width=135) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_38_d3_d_date_sk_min) AND DynamicValue(RS_38_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_212] + Group By Operator [GBY_211] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_210] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_208] diff --git a/ql/src/test/results/clientpositive/perf/tez/query50.q.out b/ql/src/test/results/clientpositive/perf/tez/query50.q.out index efbae5cbf3..cdbaad7e06 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query50.q.out @@ -117,157 +117,157 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Reducer 10 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 8 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_156] - Limit [LIM_155] (rows=100 width=88) + File Output Operator [FS_165] + Limit [LIM_164] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_154] (rows=383325119 width=88) + Select Operator [SEL_163] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] - Group By Operator [GBY_152] (rows=383325119 width=88) + SHUFFLE [RS_162] + Group By Operator [GBY_161] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_29] (rows=766650239 width=88) + Group By Operator [GBY_29] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Top N Key Operator [TNK_56] (rows=766650239 width=88) + Top N Key Operator [TNK_56] (rows=843315281 width=88) keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9,sort order:++++++++++,top n:100 - Select Operator [SEL_27] (rows=766650239 width=88) + Select Operator [SEL_27] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_120] (rows=766650239 width=88) - Conds:RS_24._col10=RS_143._col0(Inner),Output:["_col0","_col7","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + Merge Join Operator [MERGEJOIN_120] (rows=843315281 width=88) + Conds:RS_24._col3=RS_149._col0(Inner),Output:["_col0","_col5","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_142] (rows=1704 width=1910) + Select Operator [SEL_148] (rows=1704 width=1910) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_141] (rows=1704 width=1910) + Filter Operator [FIL_147] (rows=1704 width=1910) predicate:s_store_sk is not null TableScan [TS_12] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_119] (rows=696954748 width=88) - Conds:RS_21._col7=RS_135._col0(Inner),Output:["_col0","_col7","_col10"] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_119] (rows=766650239 width=88) + Conds:RS_21._col0=RS_141._col0(Inner),Output:["_col0","_col3","_col5"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_134] (rows=73049 width=1119) + Select Operator [SEL_140] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_133] (rows=73049 width=1119) + Filter Operator [FIL_139] (rows=73049 width=1119) predicate:d_date_sk is not null TableScan [TS_9] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_118] (rows=633595212 width=88) - Conds:RS_18._col1, _col2, _col3=RS_151._col1, _col2, _col4(Inner),Output:["_col0","_col7","_col10"] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_118] (rows=696954748 width=88) + Conds:RS_18._col5=RS_160._col0(Inner),Output:["_col0","_col3","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_160] + PartitionCols:_col0 + Select Operator [SEL_159] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_158] (rows=18262 width=1119) + predicate:((d_moy = 9) and (d_year = 2000) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_18] - PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_117] (rows=63350266 width=77) - Conds:RS_123._col0=RS_126._col0(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - PartitionCols:_col0 + SHUFFLE [RS_18] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_117] (rows=633595212 width=88) + Conds:RS_157._col1, _col2, _col4=RS_123._col1, _col2, _col3(Inner),Output:["_col0","_col3","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_123] + PartitionCols:_col1, _col2, _col3 Select Operator [SEL_122] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_121] (rows=57591150 width=77) predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_0] (rows=57591150 width=77) + TableScan [TS_3] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] - PartitionCols:_col0 - Select Operator [SEL_125] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_124] (rows=18262 width=1119) - predicate:((d_moy = 9) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_150] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_149] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_18_store_returns_sr_customer_sk_min) AND DynamicValue(RS_18_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_18_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_18_store_returns_sr_item_sk_min) AND DynamicValue(RS_18_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_d1_d_date_sk_min) AND DynamicValue(RS_22_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_18_store_returns_sr_ticket_number_min) AND DynamicValue(RS_18_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_18_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_94] - Group By Operator [GBY_93] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_92] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_117] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_140] - Group By Operator [GBY_139] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_157] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_156] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_155] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_16_store_returns_sr_customer_sk_min) AND DynamicValue(RS_16_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_16_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_16_store_returns_sr_item_sk_min) AND DynamicValue(RS_16_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_16_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_d1_d_date_sk_min) AND DynamicValue(RS_22_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_16_store_returns_sr_ticket_number_min) AND DynamicValue(RS_16_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_16_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_136] + Group By Operator [GBY_135] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_131] + Group By Operator [GBY_128] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_125] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_122] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_138] Group By Operator [GBY_137] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_136] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_134] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_148] - Group By Operator [GBY_147] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_132] + Group By Operator [GBY_129] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_126] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_122] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_146] Group By Operator [GBY_145] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_144] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_142] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_128] - Group By Operator [GBY_127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_84] - Group By Operator [GBY_83] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_82] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_117] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_130] - Group By Operator [GBY_129] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_89] - Group By Operator [GBY_88] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_87] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_117] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_142] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_140] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + Group By Operator [GBY_151] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_150] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_148] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_130] + Group By Operator [GBY_127] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_124] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_122] diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index c889ce153f..e48e27e3ea 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[280][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[281][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 9' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -115,321 +115,324 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Map 17 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Union 18 (CONTAINS) -Map 23 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Union 18 (CONTAINS) +Map 14 <- Reducer 22 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 15 (CONTAINS) +Map 20 <- Reducer 22 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Union 15 (CONTAINS) +Map 27 <- Reducer 26 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 19 <- Map 24 (SIMPLE_EDGE), Union 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 16 <- Map 21 (SIMPLE_EDGE), Union 15 (SIMPLE_EDGE) +Reducer 17 <- Map 23 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 25 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 28 (SIMPLE_EDGE) +Reducer 3 <- Reducer 19 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 28 (SIMPLE_EDGE) Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 30 (SIMPLE_EDGE) -Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 30 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 35 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 33 <- Map 28 (SIMPLE_EDGE) +Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 27 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 34 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_360] - Limit [LIM_359] (rows=100 width=158) + Reducer 12 vectorized + File Output Operator [FS_371] + Limit [LIM_370] (rows=100 width=158) Number of rows:100 - Select Operator [SEL_358] (rows=1614130953450400 width=158) + Select Operator [SEL_369] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_357] - Select Operator [SEL_356] (rows=1614130953450400 width=158) + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_368] + Select Operator [SEL_367] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_355] (rows=1614130953450400 width=158) + Group By Operator [GBY_366] (rows=1614130953450400 width=158) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_365] PartitionCols:_col0 - Group By Operator [GBY_353] (rows=3228261906900801 width=158) + Group By Operator [GBY_364] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_352] (rows=3228261906900801 width=158) + Select Operator [SEL_363] (rows=3228261906900801 width=158) Output:["_col0"] - Group By Operator [GBY_351] (rows=3228261906900801 width=158) + Group By Operator [GBY_362] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_119] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_118] PartitionCols:_col0 - Group By Operator [GBY_118] (rows=6456523813801603 width=158) + Group By Operator [GBY_117] (rows=6456523813801603 width=158) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_117] (rows=6456523813801603 width=158) + Select Operator [SEL_116] (rows=6456523813801603 width=158) Output:["_col0","_col1"] - Filter Operator [FIL_116] (rows=6456523813801603 width=158) + Filter Operator [FIL_115] (rows=6456523813801603 width=158) predicate:_col2 BETWEEN _col3 AND _col4 - Merge Join Operator [MERGEJOIN_273] (rows=58108714324214428 width=158) + Merge Join Operator [MERGEJOIN_281] (rows=58108714324214428 width=158) Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_350] - Group By Operator [GBY_349] (rows=9131 width=1119) + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_361] + Group By Operator [GBY_360] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_342] PartitionCols:_col0 - Group By Operator [GBY_328] (rows=18262 width=1119) + Group By Operator [GBY_339] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_325] (rows=18262 width=1119) + Select Operator [SEL_336] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_322] (rows=18262 width=1119) + Filter Operator [FIL_333] (rows=18262 width=1119) predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_50] (rows=73049 width=1119) + TableScan [TS_40] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_113] - Select Operator [SEL_104] (rows=6363893803988 width=1226) + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_112] + Select Operator [SEL_103] (rows=6363893803988 width=1226) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_272] (rows=6363893803988 width=1226) + Merge Join Operator [MERGEJOIN_280] (rows=6363893803988 width=1226) Conds:(Inner),Output:["_col0","_col4","_col11","_col13"] - <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_348] - Select Operator [SEL_347] (rows=1 width=8) - Filter Operator [FIL_346] (rows=1 width=8) + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_359] + Select Operator [SEL_358] (rows=1 width=8) + Filter Operator [FIL_357] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_345] (rows=1 width=8) + Group By Operator [GBY_356] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_344] - Group By Operator [GBY_343] (rows=1 width=8) + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_355] + Group By Operator [GBY_354] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_342] (rows=9131 width=1119) - Group By Operator [GBY_341] (rows=9131 width=1119) + Select Operator [SEL_353] (rows=9131 width=1119) + Group By Operator [GBY_352] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_341] PartitionCols:_col0 - Group By Operator [GBY_327] (rows=18262 width=1119) + Group By Operator [GBY_338] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_324] (rows=18262 width=1119) + Select Operator [SEL_335] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_322] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_101] - Select Operator [SEL_85] (rows=6363893803988 width=1217) + Please refer to the previous Filter Operator [FIL_333] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_100] + Select Operator [SEL_84] (rows=6363893803988 width=1217) Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_271] (rows=6363893803988 width=1217) - Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_334] - Group By Operator [GBY_332] (rows=9131 width=1119) + Merge Join Operator [MERGEJOIN_279] (rows=6363893803988 width=1217) + Conds:(Left Outer),Output:["_col5","_col9","_col12","_col13"] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_345] + Group By Operator [GBY_343] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_340] PartitionCols:_col0 - Group By Operator [GBY_326] (rows=18262 width=1119) + Group By Operator [GBY_337] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_323] (rows=18262 width=1119) + Select Operator [SEL_334] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_322] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_82] - Merge Join Operator [MERGEJOIN_270] (rows=696954748 width=97) - Conds:(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_79] - Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=88) - Conds:RS_76._col1=RS_77._col5(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_77] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_268] (rows=316240138 width=135) - Conds:RS_46._col0=RS_321._col1(Inner),Output:["_col5"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_264] (rows=44000000 width=1014) - Conds:RS_297._col1, _col2=RS_300._col0, _col1(Inner),Output:["_col0"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] - PartitionCols:_col1, _col2 - Select Operator [SEL_296] (rows=40000000 width=1014) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_295] (rows=40000000 width=1014) - predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] - PartitionCols:_col0, _col1 - Select Operator [SEL_299] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_298] (rows=1704 width=1910) - predicate:(s_county is not null and s_state is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + Please refer to the previous Filter Operator [FIL_333] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_81] + Merge Join Operator [MERGEJOIN_278] (rows=696954748 width=97) + Conds:RS_78._col7=RS_324._col0(Inner),Output:["_col5","_col9","_col12"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] + PartitionCols:_col0 + Select Operator [SEL_323] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_322] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_55] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_78] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_277] (rows=633595212 width=97) + Conds:(Inner),Output:["_col5","_col7","_col9"] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_351] + Select Operator [SEL_350] (rows=1 width=8) + Filter Operator [FIL_349] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_348] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_347] + Group By Operator [GBY_346] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_344] (rows=9131 width=1119) + Please refer to the previous Group By Operator [GBY_343] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_75] + Merge Join Operator [MERGEJOIN_276] (rows=633595212 width=88) + Conds:RS_72._col5=RS_332._col1(Inner),Output:["_col5","_col7","_col9"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] PartitionCols:_col1 - Select Operator [SEL_320] (rows=287491029 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_319] (rows=287491029 width=135) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col0, _col1 - Group By Operator [GBY_39] (rows=574982058 width=135) - Output:["_col0","_col1"],keys:_col10, _col9 - Merge Join Operator [MERGEJOIN_267] (rows=574982058 width=135) - Conds:RS_35._col1=RS_315._col0(Inner),Output:["_col9","_col10"] - <-Map 28 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_315] - PartitionCols:_col0 - Select Operator [SEL_314] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_313] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_26] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_266] (rows=522710951 width=135) - Conds:RS_32._col2=RS_309._col0(Inner),Output:["_col1"] - <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_309] - PartitionCols:_col0 - Select Operator [SEL_308] (rows=115500 width=1436) - Output:["_col0"] - Filter Operator [FIL_307] (rows=115500 width=1436) - predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) - TableScan [TS_23] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_265] (rows=475191764 width=135) - Conds:Union 18._col0=RS_303._col0(Inner),Output:["_col1","_col2"] - <-Map 24 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] - PartitionCols:_col0 - Select Operator [SEL_302] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_301] (rows=18262 width=1119) - predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_20] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Union 18 [SIMPLE_EDGE] - <-Map 17 [CONTAINS] vectorized - Reduce Output Operator [RS_371] - PartitionCols:_col0 - Select Operator [SEL_370] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_369] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_36_customer_c_customer_sk_min) AND DynamicValue(RS_36_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_36_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_274] (rows=287989836 width=135) - Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_362] - Group By Operator [GBY_361] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - Group By Operator [GBY_305] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_304] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_302] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_365] - Group By Operator [GBY_364] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_312] - Group By Operator [GBY_311] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_310] (rows=115500 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_308] - <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_368] - Group By Operator [GBY_367] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_318] - Group By Operator [GBY_317] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_316] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_314] - <-Map 23 [CONTAINS] vectorized - Reduce Output Operator [RS_374] - PartitionCols:_col0 - Select Operator [SEL_373] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_372] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_279] (rows=144002668 width=135) - Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_363] - Please refer to the previous Group By Operator [GBY_361] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Please refer to the previous Group By Operator [GBY_364] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_76] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_263] (rows=633595212 width=88) - Conds:RS_294._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] - PartitionCols:_col0 - Select Operator [SEL_285] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_284] (rows=73049 width=1119) - predicate:d_date_sk is not null - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] - PartitionCols:_col0 - Select Operator [SEL_293] (rows=575995635 width=88) + Select Operator [SEL_331] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_292] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) + Filter Operator [FIL_330] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_30_customer_c_customer_sk_min) AND DynamicValue(RS_30_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_30_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_79_date_dim_d_date_sk_min) AND DynamicValue(RS_79_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_79_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_37] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_291] - Group By Operator [GBY_290] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_321] + Group By Operator [GBY_319] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_315] + Group By Operator [GBY_314] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] + Select Operator [SEL_313] (rows=80000000 width=860) + Output:["_col0"] + Select Operator [SEL_311] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_310] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_20] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 35 [BROADCAST_EDGE] vectorized + BROADCAST [RS_329] + Group By Operator [GBY_328] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] - Group By Operator [GBY_288] (rows=1 width=12) + <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] + Group By Operator [GBY_326] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_287] (rows=73049 width=1119) + Select Operator [SEL_325] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_285] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Select Operator [SEL_339] (rows=1 width=8) - Filter Operator [FIL_338] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_337] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_336] - Group By Operator [GBY_335] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_333] (rows=9131 width=1119) - Please refer to the previous Group By Operator [GBY_332] + Please refer to the previous Select Operator [SEL_323] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_275] (rows=316240138 width=135) + Conds:RS_69._col0=RS_318._col1(Inner),Output:["_col5"] + <-Reducer 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col1 + Select Operator [SEL_317] (rows=287491029 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_316] (rows=287491029 width=135) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0, _col1 + Group By Operator [GBY_33] (rows=574982058 width=135) + Output:["_col0","_col1"],keys:_col10, _col9 + Merge Join Operator [MERGEJOIN_274] (rows=574982058 width=135) + Conds:RS_29._col1=RS_312._col0(Inner),Output:["_col9","_col10"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_312] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_311] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_273] (rows=522710951 width=135) + Conds:RS_26._col2=RS_306._col0(Inner),Output:["_col1"] + <-Map 23 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] + PartitionCols:_col0 + Select Operator [SEL_305] (rows=115500 width=1436) + Output:["_col0"] + Filter Operator [FIL_304] (rows=115500 width=1436) + predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) + TableScan [TS_17] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_272] (rows=475191764 width=135) + Conds:Union 15._col0=RS_300._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_300] + PartitionCols:_col0 + Select Operator [SEL_299] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_298] (rows=18262 width=1119) + predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_14] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Union 15 [SIMPLE_EDGE] + <-Map 14 [CONTAINS] vectorized + Reduce Output Operator [RS_380] + PartitionCols:_col0 + Select Operator [SEL_379] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_378] (rows=287989836 width=135) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_30_customer_c_customer_sk_min) AND DynamicValue(RS_30_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_30_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_27_item_i_item_sk_min) AND DynamicValue(RS_27_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_27_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_24_date_dim_d_date_sk_min) AND DynamicValue(RS_24_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_24_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_282] (rows=287989836 width=135) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_373] + Group By Operator [GBY_372] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_303] + Group By Operator [GBY_302] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_301] (rows=18262 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_299] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_376] + Group By Operator [GBY_375] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_309] + Group By Operator [GBY_308] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_307] (rows=115500 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_305] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_320] + Please refer to the previous Group By Operator [GBY_319] + <-Map 20 [CONTAINS] vectorized + Reduce Output Operator [RS_383] + PartitionCols:_col0 + Select Operator [SEL_382] (rows=144002668 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_381] (rows=144002668 width=135) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_27_item_i_item_sk_min) AND DynamicValue(RS_27_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_27_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_24_date_dim_d_date_sk_min) AND DynamicValue(RS_24_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_24_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_287] (rows=144002668 width=135) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_374] + Please refer to the previous Group By Operator [GBY_372] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_377] + Please refer to the previous Group By Operator [GBY_375] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_271] (rows=44000000 width=1014) + Conds:RS_294._col1, _col2=RS_297._col0, _col1(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] + PartitionCols:_col1, _col2 + Select Operator [SEL_293] (rows=40000000 width=1014) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_292] (rows=40000000 width=1014) + predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) + TableScan [TS_0] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] + PartitionCols:_col0, _col1 + Select Operator [SEL_296] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_295] (rows=1704 width=1910) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_3] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out index e3eea5886c..6b11fc2641 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -237,64 +237,64 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 40 <- Reducer 23 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 62 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE), Reducer 66 (BROADCAST_EDGE), Reducer 70 (BROADCAST_EDGE) -Map 49 <- Reducer 43 (BROADCAST_EDGE), Reducer 54 (BROADCAST_EDGE), Reducer 55 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE) -Map 72 <- Reducer 31 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE), Reducer 39 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 48 (BROADCAST_EDGE), Reducer 58 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE), Reducer 68 (BROADCAST_EDGE), Reducer 71 (BROADCAST_EDGE) -Map 73 <- Reducer 45 (BROADCAST_EDGE), Reducer 59 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE) +Map 38 <- Reducer 22 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 48 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 57 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE), Reducer 70 (BROADCAST_EDGE) +Map 54 <- Reducer 42 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 59 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE) +Map 72 <- Reducer 29 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE), Reducer 37 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 49 (BROADCAST_EDGE), Reducer 50 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE), Reducer 68 (BROADCAST_EDGE), Reducer 71 (BROADCAST_EDGE) +Map 73 <- Reducer 45 (BROADCAST_EDGE), Reducer 49 (BROADCAST_EDGE), Reducer 64 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE) Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 30 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 12 <- Reducer 28 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 13 <- Map 69 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 17 <- Map 46 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 51 (ONE_TO_ONE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) +Reducer 17 <- Map 51 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 56 (ONE_TO_ONE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 20 <- Map 61 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 37 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 64 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 15 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) -Reducer 25 <- Map 46 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 57 (ONE_TO_ONE_EDGE) -Reducer 28 <- Map 61 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) -Reducer 29 <- Map 37 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 20 <- Map 66 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 35 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 15 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) +Reducer 24 <- Map 51 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 62 (ONE_TO_ONE_EDGE) +Reducer 27 <- Map 66 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Map 35 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 15 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 64 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 32 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) -Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 38 <- Map 37 (CUSTOM_SIMPLE_EDGE) -Reducer 39 <- Map 37 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) -Reducer 43 <- Map 42 (CUSTOM_SIMPLE_EDGE) -Reducer 44 <- Map 42 (SIMPLE_EDGE), Map 72 (SIMPLE_EDGE) -Reducer 45 <- Map 42 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) +Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 36 <- Map 35 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Map 35 (CUSTOM_SIMPLE_EDGE) +Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 40 <- Map 46 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) +Reducer 42 <- Map 41 (CUSTOM_SIMPLE_EDGE) +Reducer 43 <- Map 41 (SIMPLE_EDGE), Map 72 (SIMPLE_EDGE) +Reducer 44 <- Map 46 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE) +Reducer 45 <- Map 41 (CUSTOM_SIMPLE_EDGE) Reducer 47 <- Map 46 (CUSTOM_SIMPLE_EDGE) Reducer 48 <- Map 46 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Map 37 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Map 49 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) -Reducer 51 <- Reducer 50 (SIMPLE_EDGE) -Reducer 52 <- Reducer 51 (CUSTOM_SIMPLE_EDGE) -Reducer 54 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 55 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 56 <- Map 53 (SIMPLE_EDGE), Map 73 (SIMPLE_EDGE) -Reducer 57 <- Reducer 56 (SIMPLE_EDGE) -Reducer 58 <- Reducer 57 (CUSTOM_SIMPLE_EDGE) -Reducer 59 <- Map 53 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Map 46 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 35 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 50 <- Map 46 (CUSTOM_SIMPLE_EDGE) +Reducer 52 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 55 <- Map 54 (SIMPLE_EDGE), Map 58 (SIMPLE_EDGE) +Reducer 56 <- Reducer 55 (SIMPLE_EDGE) +Reducer 57 <- Reducer 56 (CUSTOM_SIMPLE_EDGE) +Reducer 59 <- Map 58 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 69 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 60 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 62 <- Map 61 (CUSTOM_SIMPLE_EDGE) -Reducer 63 <- Map 61 (CUSTOM_SIMPLE_EDGE) -Reducer 65 <- Map 64 (CUSTOM_SIMPLE_EDGE) -Reducer 66 <- Map 64 (CUSTOM_SIMPLE_EDGE) -Reducer 67 <- Map 64 (CUSTOM_SIMPLE_EDGE) -Reducer 68 <- Map 64 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 60 <- Map 58 (CUSTOM_SIMPLE_EDGE) +Reducer 61 <- Map 58 (SIMPLE_EDGE), Map 73 (SIMPLE_EDGE) +Reducer 62 <- Reducer 61 (SIMPLE_EDGE) +Reducer 63 <- Reducer 62 (CUSTOM_SIMPLE_EDGE) +Reducer 64 <- Map 58 (CUSTOM_SIMPLE_EDGE) +Reducer 65 <- Map 58 (CUSTOM_SIMPLE_EDGE) +Reducer 67 <- Map 66 (CUSTOM_SIMPLE_EDGE) +Reducer 68 <- Map 66 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 70 <- Map 69 (CUSTOM_SIMPLE_EDGE) Reducer 71 <- Map 69 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 69 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) @@ -305,8 +305,8 @@ Stage-0 limit:-1 Stage-1 Reducer 11 vectorized - File Output Operator [FS_1283] - Select Operator [SEL_1282] (rows=273897192 width=88) + File Output Operator [FS_1281] + Select Operator [SEL_1280] (rows=273897192 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_259] @@ -314,688 +314,688 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] Filter Operator [FIL_257] (rows=273897192 width=88) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_1087] (rows=821691577 width=88) - Conds:RS_1239._col2, _col1, _col3=RS_1281._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + Merge Join Operator [MERGEJOIN_1085] (rows=821691577 width=88) + Conds:RS_1237._col2, _col1, _col3=RS_1279._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1281] + SHUFFLE [RS_1279] PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_1280] (rows=746992327 width=88) + Select Operator [SEL_1278] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_1279] (rows=746992327 width=88) + Group By Operator [GBY_1277] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_251] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Group By Operator [GBY_250] (rows=1493984654 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col49 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col45)","sum(_col46)","sum(_col47)"],keys:_col26, _col48, _col27, _col7, _col9, _col14, _col15, _col16, _col17, _col21, _col22, _col23, _col24, _col51 Select Operator [SEL_249] (rows=1493984654 width=88) - Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] + Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col45","_col46","_col47","_col48","_col51"] Filter Operator [FIL_248] (rows=1493984654 width=88) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_1086] (rows=1493984654 width=88) - Conds:RS_245._col37=RS_1129._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] + Merge Join Operator [MERGEJOIN_1084] (rows=1493984654 width=88) + Conds:RS_245._col39=RS_1127._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col21","_col22","_col23","_col24","_col26","_col27","_col45","_col46","_col47","_col48","_col51","_col56"] <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1129] + SHUFFLE [RS_1127] PartitionCols:_col0 - Select Operator [SEL_1125] (rows=1861800 width=385) + Select Operator [SEL_1123] (rows=1861800 width=385) Output:["_col0","_col1"] - Filter Operator [FIL_1124] (rows=1861800 width=385) + Filter Operator [FIL_1122] (rows=1861800 width=385) predicate:cd_demo_sk is not null TableScan [TS_97] (rows=1861800 width=385) default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_245] - PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_1085] (rows=1358167838 width=88) - Conds:RS_242._col0=RS_243._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] + PartitionCols:_col39 + Merge Join Operator [MERGEJOIN_1083] (rows=1358167838 width=88) + Conds:RS_242._col0=RS_243._col18(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col21","_col22","_col23","_col24","_col26","_col27","_col39","_col45","_col46","_col47","_col48","_col51"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_242] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1056] (rows=128840811 width=860) - Conds:RS_112._col1=RS_1128._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] + Merge Join Operator [MERGEJOIN_1054] (rows=128840811 width=860) + Conds:RS_112._col1=RS_1126._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1128] + SHUFFLE [RS_1126] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1125] + Please refer to the previous Select Operator [SEL_1123] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_112] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1055] (rows=117128008 width=860) - Conds:RS_109._col3=RS_1115._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1115] + Merge Join Operator [MERGEJOIN_1053] (rows=117128008 width=860) + Conds:RS_109._col3=RS_1113._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1113] PartitionCols:_col0 - Select Operator [SEL_1114] (rows=40000000 width=1014) + Select Operator [SEL_1112] (rows=40000000 width=1014) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1113] (rows=40000000 width=1014) + Filter Operator [FIL_1111] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_19] (rows=40000000 width=1014) default@customer_address,ad2,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_1054] (rows=106480005 width=860) + Merge Join Operator [MERGEJOIN_1052] (rows=106480005 width=860) Conds:RS_106._col2=RS_107._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] - <-Reducer 33 [SIMPLE_EDGE] + <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1053] (rows=7920 width=107) - Conds:RS_1109._col1=RS_1112._col0(Inner),Output:["_col0"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1109] + Merge Join Operator [MERGEJOIN_1051] (rows=7920 width=107) + Conds:RS_1107._col1=RS_1110._col0(Inner),Output:["_col0"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1107] PartitionCols:_col1 - Select Operator [SEL_1108] (rows=7200 width=107) + Select Operator [SEL_1106] (rows=7200 width=107) Output:["_col0","_col1"] - Filter Operator [FIL_1107] (rows=7200 width=107) + Filter Operator [FIL_1105] (rows=7200 width=107) predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,hd2,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1112] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1110] PartitionCols:_col0 - Select Operator [SEL_1111] (rows=20 width=12) + Select Operator [SEL_1109] (rows=20 width=12) Output:["_col0"] - Filter Operator [FIL_1110] (rows=20 width=12) + Filter Operator [FIL_1108] (rows=20 width=12) predicate:ib_income_band_sk is not null TableScan [TS_12] (rows=20 width=12) default@income_band,ib2,Tbl:COMPLETE,Col:NONE,Output:["ib_income_band_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_1052] (rows=96800003 width=860) - Conds:RS_103._col4=RS_1098._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] + Merge Join Operator [MERGEJOIN_1050] (rows=96800003 width=860) + Conds:RS_103._col4=RS_1096._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1098] + SHUFFLE [RS_1096] PartitionCols:_col0 - Select Operator [SEL_1094] (rows=73049 width=1119) + Select Operator [SEL_1092] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_1091] (rows=73049 width=1119) + Filter Operator [FIL_1089] (rows=73049 width=1119) predicate:d_date_sk is not null TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_1051] (rows=88000001 width=860) - Conds:RS_1090._col5=RS_1097._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] + Merge Join Operator [MERGEJOIN_1049] (rows=88000001 width=860) + Conds:RS_1088._col5=RS_1095._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1097] + SHUFFLE [RS_1095] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1094] + Please refer to the previous Select Operator [SEL_1092] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1090] + SHUFFLE [RS_1088] PartitionCols:_col5 - Select Operator [SEL_1089] (rows=80000000 width=860) + Select Operator [SEL_1087] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1088] (rows=80000000 width=860) + Filter Operator [FIL_1086] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] - <-Reducer 30 [SIMPLE_EDGE] + <-Reducer 28 [SIMPLE_EDGE] SHUFFLE [RS_243] - PartitionCols:_col16 + PartitionCols:_col18 Select Operator [SEL_223] (rows=1234698008 width=88) - Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] - Merge Join Operator [MERGEJOIN_1084] (rows=1234698008 width=88) - Conds:RS_220._col5, _col12=RS_1190._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 64 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1190] - PartitionCols:_col0, _col1 - Select Operator [SEL_1186] (rows=57591150 width=77) - Output:["_col0","_col1"] - Filter Operator [FIL_1185] (rows=57591150 width=77) - predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_75] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 29 [SIMPLE_EDGE] + Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col18","_col19","_col25","_col26","_col27","_col28","_col31"] + Merge Join Operator [MERGEJOIN_1082] (rows=1234698008 width=88) + Conds:RS_220._col13=RS_1116._col0(Inner),Output:["_col10","_col11","_col17","_col18","_col19","_col20","_col23","_col28","_col29","_col31","_col32","_col33","_col34"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1116] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1112] + <-Reducer 27 [SIMPLE_EDGE] SHUFFLE [RS_220] - PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_1083] (rows=1122452711 width=88) - Conds:RS_217._col9=RS_1118._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1118] + PartitionCols:_col13 + Merge Join Operator [MERGEJOIN_1081] (rows=1122452711 width=88) + Conds:RS_217._col14=RS_1220._col0(Inner),Output:["_col10","_col11","_col13","_col17","_col18","_col19","_col20","_col23","_col28","_col29"] + <-Map 66 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1220] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1114] - <-Reducer 28 [SIMPLE_EDGE] + Select Operator [SEL_1217] (rows=1704 width=1910) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1216] (rows=1704 width=1910) + predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) + TableScan [TS_75] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_217] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1082] (rows=1020411534 width=88) - Conds:RS_214._col10=RS_1220._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] - <-Map 61 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1220] + PartitionCols:_col14 + Merge Join Operator [MERGEJOIN_1080] (rows=1020411534 width=88) + Conds:RS_214._col9=RS_1262._col0(Inner),Output:["_col10","_col11","_col13","_col14","_col17","_col18","_col19","_col20","_col23"] + <-Reducer 62 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1262] PartitionCols:_col0 - Select Operator [SEL_1217] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1216] (rows=1704 width=1910) - predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) - TableScan [TS_69] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 27 [SIMPLE_EDGE] + Select Operator [SEL_1261] (rows=52798137 width=135) + Output:["_col0"] + Filter Operator [FIL_1260] (rows=52798137 width=135) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1259] (rows=158394413 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 61 [SIMPLE_EDGE] + SHUFFLE [RS_198] + PartitionCols:_col0 + Group By Operator [GBY_197] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_195] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_1078] (rows=316788826 width=135) + Conds:RS_1258._col0, _col1=RS_1173._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] + <-Map 58 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1173] + PartitionCols:_col0, _col1 + Select Operator [SEL_1169] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_1168] (rows=28798881 width=106) + predicate:(cr_item_sk is not null and cr_order_number is not null) + TableScan [TS_62] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] + <-Map 73 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1258] + PartitionCols:_col0, _col1 + Select Operator [SEL_1257] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1256] (rows=287989836 width=135) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_174_item_i_item_sk_min) AND DynamicValue(RS_174_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_174_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_177_store_returns_sr_item_sk_min) AND DynamicValue(RS_177_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_177_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_193_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_193_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_193_catalog_returns_cr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_193_catalog_returns_cr_order_number_min) AND DynamicValue(RS_193_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_193_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_186] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1240] + Group By Operator [GBY_1238] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 41 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1142] + Group By Operator [GBY_1140] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1138] (rows=5703 width=1436) + Output:["_col0"] + Select Operator [SEL_1134] (rows=5703 width=1436) + Output:["_col0","_col3"] + Filter Operator [FIL_1133] (rows=5703 width=1436) + predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) + TableScan [TS_34] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] + <-Reducer 49 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1243] + Group By Operator [GBY_1241] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1161] + Group By Operator [GBY_1157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1153] (rows=57591150 width=77) + Output:["_col0"] + Select Operator [SEL_1148] (rows=57591150 width=77) + Output:["_col0","_col1"] + Filter Operator [FIL_1147] (rows=57591150 width=77) + predicate:(sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_37] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 64 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1253] + Group By Operator [GBY_1252] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 58 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1182] + Group By Operator [GBY_1178] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1174] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1169] + <-Reducer 65 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1255] + Group By Operator [GBY_1254] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 58 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1183] + Group By Operator [GBY_1179] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1175] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1169] + <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_214] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_1081] (rows=927646829 width=88) - Conds:RS_211._col5=RS_1262._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 57 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1262] - PartitionCols:_col0 - Select Operator [SEL_1261] (rows=52798137 width=135) - Output:["_col0"] - Filter Operator [FIL_1260] (rows=52798137 width=135) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1259] (rows=158394413 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 56 [SIMPLE_EDGE] - SHUFFLE [RS_192] - PartitionCols:_col0 - Group By Operator [GBY_191] (rows=316788826 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 - Select Operator [SEL_189] (rows=316788826 width=135) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_1079] (rows=316788826 width=135) - Conds:RS_1258._col0, _col1=RS_1170._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1170] - PartitionCols:_col0, _col1 - Select Operator [SEL_1166] (rows=28798881 width=106) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1165] (rows=28798881 width=106) - predicate:(cr_item_sk is not null and cr_order_number is not null) - TableScan [TS_56] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] - <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1258] - PartitionCols:_col0, _col1 - Select Operator [SEL_1257] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1256] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_187_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_187_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_187_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_187_catalog_returns_cr_order_number_min) AND DynamicValue(RS_187_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_187_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_180] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1242] - Group By Operator [GBY_1240] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1144] - Group By Operator [GBY_1142] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1140] (rows=5703 width=1436) - Output:["_col0"] - Select Operator [SEL_1136] (rows=5703 width=1436) - Output:["_col0","_col3"] - Filter Operator [FIL_1135] (rows=5703 width=1436) - predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) - TableScan [TS_34] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] - <-Reducer 67 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1255] - Group By Operator [GBY_1253] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1199] - Group By Operator [GBY_1195] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1191] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 59 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1250] - Group By Operator [GBY_1249] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1179] - Group By Operator [GBY_1175] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1171] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1166] - <-Reducer 60 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1252] - Group By Operator [GBY_1251] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1180] - Group By Operator [GBY_1176] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1172] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1166] - <-Reducer 26 [SIMPLE_EDGE] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_1079] (rows=927646829 width=88) + Conds:RS_211._col0=RS_212._col9(Inner),Output:["_col9","_col10","_col11","_col13","_col14","_col17","_col18","_col19","_col20","_col23"] + <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_211] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_1080] (rows=843315281 width=88) - Conds:RS_208._col0=RS_209._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_208] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_209] - PartitionCols:_col5 - Select Operator [SEL_179] (rows=766650239 width=88) - Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_1078] (rows=766650239 width=88) - Conds:RS_176._col7=RS_1155._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 46 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1155] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_1051] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_212] + PartitionCols:_col9 + Select Operator [SEL_185] (rows=843315281 width=88) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col14","_col15","_col16","_col17","_col20"] + Merge Join Operator [MERGEJOIN_1077] (rows=843315281 width=88) + Conds:RS_182._col7=RS_1206._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col9","_col10","_col11","_col12","_col15"] + <-Map 51 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1206] + PartitionCols:_col0 + Select Operator [SEL_1203] (rows=2300 width=1179) + Output:["_col0"] + Filter Operator [FIL_1202] (rows=2300 width=1179) + predicate:p_promo_sk is not null + TableScan [TS_43] (rows=2300 width=1179) + default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_182] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_1076] (rows=766650239 width=88) + Conds:RS_179._col0=RS_1099._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col15"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1099] PartitionCols:_col0 - Select Operator [SEL_1152] (rows=2300 width=1179) + Select Operator [SEL_1094] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_1151] (rows=2300 width=1179) - predicate:p_promo_sk is not null - TableScan [TS_40] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_176] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1077] (rows=696954748 width=88) - Conds:RS_173._col0=RS_1101._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1101] - PartitionCols:_col0 - Select Operator [SEL_1096] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_1093] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 44 [SIMPLE_EDGE] - SHUFFLE [RS_173] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1076] (rows=633595212 width=88) - Conds:RS_1278._col1=RS_1139._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 42 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1139] + Filter Operator [FIL_1091] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_179] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1075] (rows=696954748 width=88) + Conds:RS_176._col1, _col8=RS_1152._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col15"] + <-Map 46 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1152] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1148] + <-Reducer 43 [SIMPLE_EDGE] + SHUFFLE [RS_176] + PartitionCols:_col1, _col8 + Merge Join Operator [MERGEJOIN_1074] (rows=633595212 width=88) + Conds:RS_1276._col1=RS_1137._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 41 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1137] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1136] + Please refer to the previous Select Operator [SEL_1134] <-Map 72 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1278] + SHUFFLE [RS_1276] PartitionCols:_col1 - Select Operator [SEL_1277] (rows=575995635 width=88) + Select Operator [SEL_1275] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1276] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_218_ad1_ca_address_sk_min) AND DynamicValue(RS_218_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_218_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_246_cd1_cd_demo_sk_min) AND DynamicValue(RS_246_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_246_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_208_hd1_hd_demo_sk_min) AND DynamicValue(RS_208_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_208_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_212_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_212_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_212_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_177_promotion_p_promo_sk_min) AND DynamicValue(RS_177_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_177_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_174_d1_d_date_sk_min) AND DynamicValue(RS_174_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_174_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_215_store_s_store_sk_min) AND DynamicValue(RS_215_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_215_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_221_store_returns_sr_ticket_number_min) AND DynamicValue(RS_221_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_221_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + Filter Operator [FIL_1274] (rows=575995635 width=88) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_221_ad1_ca_address_sk_min) AND DynamicValue(RS_221_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_221_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_246_cd1_cd_demo_sk_min) AND DynamicValue(RS_246_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_246_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_211_hd1_hd_demo_sk_min) AND DynamicValue(RS_211_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_211_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_174_item_i_item_sk_min) AND DynamicValue(RS_174_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_174_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_177_store_returns_sr_item_sk_min) AND DynamicValue(RS_177_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_177_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_215_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_215_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_215_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_47_item_i_item_sk_min) AND DynamicValue(RS_47_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_47_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_183_promotion_p_promo_sk_min) AND DynamicValue(RS_183_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_183_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_180_d1_d_date_sk_min) AND DynamicValue(RS_180_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_180_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_218_store_s_store_sk_min) AND DynamicValue(RS_218_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_218_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_177_store_returns_sr_ticket_number_min) AND DynamicValue(RS_177_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_177_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_158] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1148] - Group By Operator [GBY_1145] (rows=1 width=12) + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1146] + Group By Operator [GBY_1143] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1143] - Group By Operator [GBY_1141] (rows=1 width=12) + <-Map 41 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1141] + Group By Operator [GBY_1139] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1138] (rows=5703 width=1436) + Select Operator [SEL_1136] (rows=5703 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1136] + Please refer to the previous Select Operator [SEL_1134] <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1241] - Please refer to the previous Group By Operator [GBY_1240] - <-Reducer 67 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1254] - Please refer to the previous Group By Operator [GBY_1253] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1244] - Group By Operator [GBY_1243] (rows=1 width=12) + BROADCAST [RS_1239] + Please refer to the previous Group By Operator [GBY_1238] + <-Reducer 49 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1242] + Please refer to the previous Group By Operator [GBY_1241] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1247] + Group By Operator [GBY_1246] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1106] - Group By Operator [GBY_1104] (rows=1 width=12) + SHUFFLE [RS_1104] + Group By Operator [GBY_1102] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1102] (rows=36524 width=1119) + Select Operator [SEL_1100] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1096] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1248] - Group By Operator [GBY_1247] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1094] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1251] + Group By Operator [GBY_1250] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_909] - Group By Operator [GBY_908] (rows=1 width=12) + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_916] + Group By Operator [GBY_915] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_907] (rows=7920 width=107) + Select Operator [SEL_914] (rows=7920 width=107) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 39 [BROADCAST_EDGE] vectorized + Please refer to the previous Merge Join Operator [MERGEJOIN_1051] + <-Reducer 37 [BROADCAST_EDGE] vectorized BROADCAST [RS_1271] Group By Operator [GBY_1270] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1123] - Group By Operator [GBY_1121] (rows=1 width=12) + <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1121] + Group By Operator [GBY_1119] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_1119] (rows=40000000 width=1014) + Select Operator [SEL_1117] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1114] - <-Reducer 48 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1246] - Group By Operator [GBY_1245] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + Please refer to the previous Select Operator [SEL_1112] + <-Reducer 50 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1245] + Group By Operator [GBY_1244] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1160] + PARTITION_ONLY_SHUFFLE [RS_1162] Group By Operator [GBY_1158] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1154] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1148] + <-Reducer 53 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1249] + Group By Operator [GBY_1248] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1211] + Group By Operator [GBY_1209] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1156] (rows=2300 width=1179) + Select Operator [SEL_1207] (rows=2300 width=1179) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1152] - <-Reducer 58 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_1203] + <-Reducer 63 [BROADCAST_EDGE] vectorized BROADCAST [RS_1267] Group By Operator [GBY_1266] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=52798136)"] - <-Reducer 57 [CUSTOM_SIMPLE_EDGE] vectorized + <-Reducer 62 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_1265] Group By Operator [GBY_1264] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=52798136)"] Select Operator [SEL_1263] (rows=52798137 width=135) Output:["_col0"] Please refer to the previous Select Operator [SEL_1261] - <-Reducer 63 [BROADCAST_EDGE] vectorized + <-Reducer 68 [BROADCAST_EDGE] vectorized BROADCAST [RS_1269] Group By Operator [GBY_1268] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 61 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 66 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_1225] Group By Operator [GBY_1223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_1221] (rows=1704 width=1910) Output:["_col0"] Please refer to the previous Select Operator [SEL_1217] - <-Reducer 68 [BROADCAST_EDGE] vectorized + <-Reducer 71 [BROADCAST_EDGE] vectorized BROADCAST [RS_1273] Group By Operator [GBY_1272] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1200] - Group By Operator [GBY_1196] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1192] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 71 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1275] - Group By Operator [GBY_1274] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] <-Map 69 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1134] - Group By Operator [GBY_1132] (rows=1 width=12) + SHUFFLE [RS_1132] + Group By Operator [GBY_1130] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_1130] (rows=1861800 width=385) + Select Operator [SEL_1128] (rows=1861800 width=385) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1125] + Please refer to the previous Select Operator [SEL_1123] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1239] + SHUFFLE [RS_1237] PartitionCols:_col2, _col1, _col3 - Select Operator [SEL_1238] (rows=746992327 width=88) + Select Operator [SEL_1236] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Group By Operator [GBY_1237] (rows=746992327 width=88) + Group By Operator [GBY_1235] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_124] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Group By Operator [GBY_123] (rows=1493984654 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col49 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col45)","sum(_col46)","sum(_col47)"],keys:_col26, _col48, _col27, _col7, _col9, _col14, _col15, _col16, _col17, _col21, _col22, _col23, _col24, _col51 Select Operator [SEL_122] (rows=1493984654 width=88) - Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] + Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col45","_col46","_col47","_col48","_col51"] Filter Operator [FIL_121] (rows=1493984654 width=88) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_1068] (rows=1493984654 width=88) - Conds:RS_118._col37=RS_1126._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] + Merge Join Operator [MERGEJOIN_1066] (rows=1493984654 width=88) + Conds:RS_118._col39=RS_1124._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col21","_col22","_col23","_col24","_col26","_col27","_col45","_col46","_col47","_col48","_col51","_col56"] <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1126] + SHUFFLE [RS_1124] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1125] + Please refer to the previous Select Operator [SEL_1123] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_118] - PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_1067] (rows=1358167838 width=88) - Conds:RS_115._col0=RS_116._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] + PartitionCols:_col39 + Merge Join Operator [MERGEJOIN_1065] (rows=1358167838 width=88) + Conds:RS_115._col0=RS_116._col18(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col21","_col22","_col23","_col24","_col26","_col27","_col39","_col45","_col46","_col47","_col48","_col51"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_115] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1056] - <-Reducer 22 [SIMPLE_EDGE] + Please refer to the previous Merge Join Operator [MERGEJOIN_1054] + <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_116] - PartitionCols:_col16 + PartitionCols:_col18 Select Operator [SEL_96] (rows=1234698008 width=88) - Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] - Merge Join Operator [MERGEJOIN_1066] (rows=1234698008 width=88) - Conds:RS_93._col5, _col12=RS_1187._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 64 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1187] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 21 [SIMPLE_EDGE] + Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col18","_col19","_col25","_col26","_col27","_col28","_col31"] + Merge Join Operator [MERGEJOIN_1064] (rows=1234698008 width=88) + Conds:RS_93._col13=RS_1114._col0(Inner),Output:["_col10","_col11","_col17","_col18","_col19","_col20","_col23","_col28","_col29","_col31","_col32","_col33","_col34"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1114] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1112] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_93] - PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_1065] (rows=1122452711 width=88) - Conds:RS_90._col9=RS_1116._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1116] + PartitionCols:_col13 + Merge Join Operator [MERGEJOIN_1063] (rows=1122452711 width=88) + Conds:RS_90._col14=RS_1218._col0(Inner),Output:["_col10","_col11","_col13","_col17","_col18","_col19","_col20","_col23","_col28","_col29"] + <-Map 66 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1218] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1114] - <-Reducer 20 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_1217] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_90] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1064] (rows=1020411534 width=88) - Conds:RS_87._col10=RS_1218._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] - <-Map 61 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1218] + PartitionCols:_col14 + Merge Join Operator [MERGEJOIN_1062] (rows=1020411534 width=88) + Conds:RS_87._col9=RS_1194._col0(Inner),Output:["_col10","_col11","_col13","_col14","_col17","_col18","_col19","_col20","_col23"] + <-Reducer 56 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1194] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1217] - <-Reducer 19 [SIMPLE_EDGE] + Select Operator [SEL_1193] (rows=52798137 width=135) + Output:["_col0"] + Filter Operator [FIL_1192] (rows=52798137 width=135) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1191] (rows=158394413 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 55 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Group By Operator [GBY_70] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_68] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_1060] (rows=316788826 width=135) + Conds:RS_1190._col0, _col1=RS_1170._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] + <-Map 58 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1170] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1169] + <-Map 54 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1190] + PartitionCols:_col0, _col1 + Select Operator [SEL_1189] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1188] (rows=287989836 width=135) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_47_item_i_item_sk_min) AND DynamicValue(RS_47_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_47_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_50_store_returns_sr_item_sk_min) AND DynamicValue(RS_50_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_50_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_66_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_66_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_66_catalog_returns_cr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_66_catalog_returns_cr_order_number_min) AND DynamicValue(RS_66_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_66_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_59] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1145] + Please refer to the previous Group By Operator [GBY_1143] + <-Reducer 47 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1165] + Group By Operator [GBY_1163] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1159] + Group By Operator [GBY_1155] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1150] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1148] + <-Reducer 59 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1185] + Group By Operator [GBY_1184] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 58 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1180] + Group By Operator [GBY_1176] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1171] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1169] + <-Reducer 60 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1187] + Group By Operator [GBY_1186] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 58 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1181] + Group By Operator [GBY_1177] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1172] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1169] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_87] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_1063] (rows=927646829 width=88) - Conds:RS_84._col5=RS_1210._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 51 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1210] - PartitionCols:_col0 - Select Operator [SEL_1209] (rows=52798137 width=135) - Output:["_col0"] - Filter Operator [FIL_1208] (rows=52798137 width=135) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1207] (rows=158394413 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 50 [SIMPLE_EDGE] - SHUFFLE [RS_65] - PartitionCols:_col0 - Group By Operator [GBY_64] (rows=316788826 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 - Select Operator [SEL_62] (rows=316788826 width=135) - Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_1061] (rows=316788826 width=135) - Conds:RS_1206._col0, _col1=RS_1167._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1167] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1166] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1206] - PartitionCols:_col0, _col1 - Select Operator [SEL_1205] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1204] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_60_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_60_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_60_catalog_returns_cr_order_number_min) AND DynamicValue(RS_60_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_60_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_53] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1147] - Please refer to the previous Group By Operator [GBY_1145] - <-Reducer 65 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1203] - Group By Operator [GBY_1201] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1197] - Group By Operator [GBY_1193] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1188] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 54 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1182] - Group By Operator [GBY_1181] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1177] - Group By Operator [GBY_1173] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1168] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1166] - <-Reducer 55 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1184] - Group By Operator [GBY_1183] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1178] - Group By Operator [GBY_1174] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1169] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1166] - <-Reducer 18 [SIMPLE_EDGE] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_1061] (rows=927646829 width=88) + Conds:RS_84._col0=RS_85._col9(Inner),Output:["_col9","_col10","_col11","_col13","_col14","_col17","_col18","_col19","_col20","_col23"] + <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_84] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_1062] (rows=843315281 width=88) - Conds:RS_81._col0=RS_82._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_81] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_82] - PartitionCols:_col5 - Select Operator [SEL_52] (rows=766650239 width=88) - Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_1060] (rows=766650239 width=88) - Conds:RS_49._col7=RS_1153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 46 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1153] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_1051] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col9 + Select Operator [SEL_58] (rows=843315281 width=88) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col14","_col15","_col16","_col17","_col20"] + Merge Join Operator [MERGEJOIN_1059] (rows=843315281 width=88) + Conds:RS_55._col7=RS_1204._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col9","_col10","_col11","_col12","_col15"] + <-Map 51 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1204] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1203] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_1058] (rows=766650239 width=88) + Conds:RS_52._col0=RS_1097._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col15"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1097] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1152] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1059] (rows=696954748 width=88) - Conds:RS_46._col0=RS_1099._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1099] - PartitionCols:_col0 - Select Operator [SEL_1095] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_1092] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1058] (rows=633595212 width=88) - Conds:RS_1236._col1=RS_1137._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 42 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1137] + Select Operator [SEL_1093] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_1090] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1057] (rows=696954748 width=88) + Conds:RS_49._col1, _col8=RS_1149._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col15"] + <-Map 46 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1149] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1148] + <-Reducer 39 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col1, _col8 + Merge Join Operator [MERGEJOIN_1056] (rows=633595212 width=88) + Conds:RS_1234._col1=RS_1135._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 41 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1135] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1136] - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1236] + Please refer to the previous Select Operator [SEL_1134] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1234] PartitionCols:_col1 - Select Operator [SEL_1235] (rows=575995635 width=88) + Select Operator [SEL_1233] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1234] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_91_ad1_ca_address_sk_min) AND DynamicValue(RS_91_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_91_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_119_cd1_cd_demo_sk_min) AND DynamicValue(RS_119_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_119_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_81_hd1_hd_demo_sk_min) AND DynamicValue(RS_81_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_81_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_85_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_85_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_85_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_50_promotion_p_promo_sk_min) AND DynamicValue(RS_50_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_50_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_47_d1_d_date_sk_min) AND DynamicValue(RS_47_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_47_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_88_store_s_store_sk_min) AND DynamicValue(RS_88_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_88_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_94_store_returns_sr_ticket_number_min) AND DynamicValue(RS_94_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_94_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + Filter Operator [FIL_1232] (rows=575995635 width=88) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_94_ad1_ca_address_sk_min) AND DynamicValue(RS_94_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_94_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_119_cd1_cd_demo_sk_min) AND DynamicValue(RS_119_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_119_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_84_hd1_hd_demo_sk_min) AND DynamicValue(RS_84_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_84_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_47_item_i_item_sk_min) AND DynamicValue(RS_47_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_47_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_50_store_returns_sr_item_sk_min) AND DynamicValue(RS_50_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_50_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_88_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_88_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_88_catalog_sales_cs_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_56_promotion_p_promo_sk_min) AND DynamicValue(RS_56_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_56_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_53_d1_d_date_sk_min) AND DynamicValue(RS_53_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_53_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_91_store_s_store_sk_min) AND DynamicValue(RS_91_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_91_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_50_store_returns_sr_ticket_number_min) AND DynamicValue(RS_50_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_50_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_31] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1146] - Please refer to the previous Group By Operator [GBY_1145] - <-Reducer 65 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1202] - Please refer to the previous Group By Operator [GBY_1201] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1150] - Group By Operator [GBY_1149] (rows=1 width=12) + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1144] + Please refer to the previous Group By Operator [GBY_1143] + <-Reducer 47 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1164] + Please refer to the previous Group By Operator [GBY_1163] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1201] + Group By Operator [GBY_1200] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1105] - Group By Operator [GBY_1103] (rows=1 width=12) + SHUFFLE [RS_1103] + Group By Operator [GBY_1101] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1100] (rows=36524 width=1119) + Select Operator [SEL_1098] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1095] - <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1164] - Group By Operator [GBY_1163] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1093] + <-Reducer 32 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1215] + Group By Operator [GBY_1214] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_674] - Group By Operator [GBY_673] (rows=1 width=12) + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_695] + Group By Operator [GBY_694] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_672] (rows=7920 width=107) + Select Operator [SEL_693] (rows=7920 width=107) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 38 [BROADCAST_EDGE] vectorized + Please refer to the previous Merge Join Operator [MERGEJOIN_1051] + <-Reducer 36 [BROADCAST_EDGE] vectorized BROADCAST [RS_1229] Group By Operator [GBY_1228] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1122] - Group By Operator [GBY_1120] (rows=1 width=12) + <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1120] + Group By Operator [GBY_1118] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_1117] (rows=40000000 width=1014) + Select Operator [SEL_1115] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1114] - <-Reducer 47 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1162] - Group By Operator [GBY_1161] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + Please refer to the previous Select Operator [SEL_1112] + <-Reducer 48 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1167] + Group By Operator [GBY_1166] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1159] - Group By Operator [GBY_1157] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1154] (rows=2300 width=1179) + PARTITION_ONLY_SHUFFLE [RS_1160] + Group By Operator [GBY_1156] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1151] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1152] + Please refer to the previous Select Operator [SEL_1148] <-Reducer 52 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1215] - Group By Operator [GBY_1214] (rows=1 width=228) + BROADCAST [RS_1213] + Group By Operator [GBY_1212] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1210] + Group By Operator [GBY_1208] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1205] (rows=2300 width=1179) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1203] + <-Reducer 57 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1199] + Group By Operator [GBY_1198] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=52798136)"] - <-Reducer 51 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1213] - Group By Operator [GBY_1212] (rows=1 width=228) + <-Reducer 56 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1197] + Group By Operator [GBY_1196] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=52798136)"] - Select Operator [SEL_1211] (rows=52798137 width=135) + Select Operator [SEL_1195] (rows=52798137 width=135) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1209] - <-Reducer 62 [BROADCAST_EDGE] vectorized + Please refer to the previous Select Operator [SEL_1193] + <-Reducer 67 [BROADCAST_EDGE] vectorized BROADCAST [RS_1227] Group By Operator [GBY_1226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 61 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 66 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_1224] Group By Operator [GBY_1222] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_1219] (rows=1704 width=1910) Output:["_col0"] Please refer to the previous Select Operator [SEL_1217] - <-Reducer 66 [BROADCAST_EDGE] vectorized + <-Reducer 70 [BROADCAST_EDGE] vectorized BROADCAST [RS_1231] Group By Operator [GBY_1230] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1198] - Group By Operator [GBY_1194] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1189] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 70 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1233] - Group By Operator [GBY_1232] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] <-Map 69 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1133] - Group By Operator [GBY_1131] (rows=1 width=12) + SHUFFLE [RS_1131] + Group By Operator [GBY_1129] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_1127] (rows=1861800 width=385) + Select Operator [SEL_1125] (rows=1861800 width=385) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1125] + Please refer to the previous Select Operator [SEL_1123] diff --git a/ql/src/test/results/clientpositive/perf/tez/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/query72.q.out index 65a60ea30a..c3b2529134 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query72.q.out @@ -61,263 +61,263 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Reducer 18 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 12 <- Map 19 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 24 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 26 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 28 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 29 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Map 12 <- Reducer 10 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE) +Reducer 10 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 14 <- Map 20 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 22 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 25 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 11 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 27 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 28 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_315] - Limit [LIM_314] (rows=100 width=135) + Reducer 4 vectorized + File Output Operator [FS_312] + Limit [LIM_311] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_313] (rows=37725837 width=135) + Select Operator [SEL_310] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] - Group By Operator [GBY_311] (rows=37725837 width=135) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_309] + Group By Operator [GBY_308] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 5 [SIMPLE_EDGE] + <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_68] (rows=75451675 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col3)","count(_col4)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_66] (rows=75451675 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_251] (rows=75451675 width=135) - Conds:RS_63._col4, _col6=RS_310._col0, _col1(Left Outer),Output:["_col13","_col15","_col22","_col28"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] + Merge Join Operator [MERGEJOIN_248] (rows=75451675 width=135) + Conds:RS_251._col0, _col1=RS_64._col4, _col6(Right Outer),Output:["_col15","_col17","_col24","_col30"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] PartitionCols:_col0, _col1 - Select Operator [SEL_309] (rows=28798881 width=106) + Select Operator [SEL_250] (rows=28798881 width=106) Output:["_col0","_col1"] - Filter Operator [FIL_308] (rows=28798881 width=106) + Filter Operator [FIL_249] (rows=28798881 width=106) predicate:cr_item_sk is not null - TableScan [TS_60] (rows=28798881 width=106) + TableScan [TS_0] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_63] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_64] PartitionCols:_col4, _col6 - Select Operator [SEL_59] (rows=68592431 width=135) + Select Operator [SEL_62] (rows=68592431 width=135) Output:["_col4","_col6","_col13","_col15","_col22","_col28"] - Merge Join Operator [MERGEJOIN_250] (rows=68592431 width=135) - Conds:RS_56._col0, _col20=RS_307._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col20","_col26"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] - PartitionCols:_col0, _col1 - Select Operator [SEL_306] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_305] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_46] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0, _col20 - Filter Operator [FIL_55] (rows=62356755 width=135) - predicate:(_col3 < _col17) - Merge Join Operator [MERGEJOIN_249] (rows=187070265 width=135) - Conds:RS_52._col1=RS_53._col8(Inner),Output:["_col0","_col3","_col5","_col9","_col14","_col16","_col17","_col20","_col26"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_242] (rows=41342400 width=15) - Conds:RS_254._col2=RS_257._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] - PartitionCols:_col2 - Select Operator [SEL_253] (rows=37584000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_252] (rows=37584000 width=15) - predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) - TableScan [TS_0] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] - PartitionCols:_col0 - Select Operator [SEL_256] (rows=27 width=1029) - Output:["_col0","_col1"] - Filter Operator [FIL_255] (rows=27 width=1029) - predicate:w_warehouse_sk is not null - TableScan [TS_3] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col8 - Select Operator [SEL_45] (rows=170063874 width=135) - Output:["_col3","_col8","_col10","_col11","_col14","_col20"] - Filter Operator [FIL_44] (rows=170063874 width=135) - predicate:(UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0D)) - Merge Join Operator [MERGEJOIN_248] (rows=510191624 width=135) - Conds:RS_41._col1=RS_294._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col16","_col18","_col20"] - <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_294] - PartitionCols:_col0 - Select Operator [SEL_293] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_292] (rows=73049 width=1119) - predicate:d_date_sk is not null - TableScan [TS_23] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_247] (rows=463810558 width=135) + Filter Operator [FIL_61] (rows=68592431 width=135) + predicate:(UDFToDouble(_col28) > (UDFToDouble(_col17) + 5.0D)) + Merge Join Operator [MERGEJOIN_247] (rows=205777295 width=135) + Conds:RS_58._col9=RS_294._col0(Inner),Output:["_col5","_col7","_col12","_col14","_col17","_col18","_col24","_col28"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] + PartitionCols:_col0 + Select Operator [SEL_293] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_292] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_45] (rows=73049 width=1119) + default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_246] (rows=187070265 width=135) + Conds:RS_55._col0, _col18=RS_307._col0, _col1(Inner),Output:["_col5","_col7","_col9","_col12","_col14","_col17","_col18","_col24"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] + PartitionCols:_col0, _col1 + Select Operator [SEL_306] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_305] (rows=73049 width=1119) + predicate:(d_date_sk is not null and d_week_seq is not null) + TableScan [TS_42] (rows=73049 width=1119) + default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0, _col18 + Filter Operator [FIL_54] (rows=170063874 width=135) + predicate:(_col3 < _col15) + Merge Join Operator [MERGEJOIN_245] (rows=510191624 width=135) + Conds:RS_51._col1=RS_52._col6(Inner),Output:["_col0","_col3","_col5","_col7","_col9","_col12","_col14","_col15","_col17","_col18","_col24"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_239] (rows=41342400 width=15) + Conds:RS_254._col2=RS_257._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] + PartitionCols:_col0 + Select Operator [SEL_256] (rows=27 width=1029) + Output:["_col0","_col1"] + Filter Operator [FIL_255] (rows=27 width=1029) + predicate:w_warehouse_sk is not null + TableScan [TS_6] (rows=27 width=1029) + default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_254] + PartitionCols:_col2 + Select Operator [SEL_253] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_252] (rows=37584000 width=15) + predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) + TableScan [TS_3] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Reducer 17 [ONE_TO_ONE_EDGE] + FORWARD [RS_52] + PartitionCols:_col6 + Select Operator [SEL_41] (rows=463810558 width=135) + Output:["_col1","_col3","_col6","_col8","_col9","_col11","_col12","_col18"] + Merge Join Operator [MERGEJOIN_244] (rows=463810558 width=135) Conds:RS_38._col4=RS_284._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16","_col18"] - <-Map 24 [SIMPLE_EDGE] vectorized + <-Map 25 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_284] PartitionCols:_col0 Select Operator [SEL_283] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_282] (rows=462000 width=1436) predicate:i_item_sk is not null - TableScan [TS_20] (rows=462000 width=1436) + TableScan [TS_23] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] - <-Reducer 14 [SIMPLE_EDGE] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_246] (rows=421645953 width=135) + Merge Join Operator [MERGEJOIN_243] (rows=421645953 width=135) Conds:RS_35._col5=RS_304._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16"] - <-Map 23 [SIMPLE_EDGE] vectorized + <-Map 24 [SIMPLE_EDGE] vectorized SHUFFLE [RS_304] PartitionCols:_col0 Select Operator [SEL_303] (rows=2300 width=1179) Output:["_col0"] - TableScan [TS_18] (rows=2300 width=1179) + TableScan [TS_21] (rows=2300 width=1179) default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_245] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_242] (rows=383314495 width=135) Conds:RS_32._col3=RS_276._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 21 [SIMPLE_EDGE] vectorized + <-Map 22 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_276] PartitionCols:_col0 Select Operator [SEL_275] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_274] (rows=3600 width=107) predicate:((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) - TableScan [TS_15] (rows=7200 width=107) + TableScan [TS_18] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_244] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_241] (rows=348467716 width=135) Conds:RS_29._col2=RS_268._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 19 [SIMPLE_EDGE] vectorized + <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_268] PartitionCols:_col0 Select Operator [SEL_267] (rows=930900 width=385) Output:["_col0"] Filter Operator [FIL_266] (rows=930900 width=385) predicate:((cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_12] (rows=1861800 width=385) + TableScan [TS_15] (rows=1861800 width=385) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_243] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_240] (rows=316788826 width=135) Conds:RS_302._col0=RS_260._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 17 [SIMPLE_EDGE] vectorized + <-Map 18 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_260] PartitionCols:_col0 Select Operator [SEL_259] (rows=36524 width=1119) Output:["_col0","_col1","_col2"] Filter Operator [FIL_258] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null and d_week_seq is not null) - TableScan [TS_9] (rows=73049 width=1119) + TableScan [TS_12] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_week_seq","d_year"] - <-Map 10 [SIMPLE_EDGE] vectorized + <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_302] PartitionCols:_col0 Select Operator [SEL_301] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Filter Operator [FIL_300] (rows=287989836 width=135) - predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_30_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_30_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_30_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_33_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_33_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_33_household_demographics_hd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_52_inventory_inv_item_sk_min) AND DynamicValue(RS_52_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_52_inventory_inv_item_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=135) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_30_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_30_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_30_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_33_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_33_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_33_household_demographics_hd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_51_inventory_inv_item_sk_min) AND DynamicValue(RS_51_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_51_inventory_inv_item_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_59_d3_d_date_sk_min) AND DynamicValue(RS_59_d3_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_59_d3_d_date_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_9] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] - <-Reducer 18 [BROADCAST_EDGE] vectorized + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_291] + Group By Operator [GBY_290] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=41342400)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_185] + Group By Operator [GBY_184] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=41342400)"] + Select Operator [SEL_183] (rows=41342400 width=15) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_239] + <-Reducer 19 [BROADCAST_EDGE] vectorized BROADCAST [RS_265] Group By Operator [GBY_264] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_263] Group By Operator [GBY_262] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_261] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_259] - <-Reducer 20 [BROADCAST_EDGE] vectorized + <-Reducer 21 [BROADCAST_EDGE] vectorized BROADCAST [RS_273] Group By Operator [GBY_272] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_271] Group By Operator [GBY_270] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_269] (rows=930900 width=385) Output:["_col0"] Please refer to the previous Select Operator [SEL_267] - <-Reducer 22 [BROADCAST_EDGE] vectorized + <-Reducer 23 [BROADCAST_EDGE] vectorized BROADCAST [RS_281] Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_279] Group By Operator [GBY_278] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_277] (rows=3600 width=107) Output:["_col0"] Please refer to the previous Select Operator [SEL_275] - <-Reducer 25 [BROADCAST_EDGE] vectorized + <-Reducer 26 [BROADCAST_EDGE] vectorized BROADCAST [RS_289] Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_287] Group By Operator [GBY_286] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_285] (rows=462000 width=1436) Output:["_col0"] Please refer to the previous Select Operator [SEL_283] - <-Reducer 27 [BROADCAST_EDGE] vectorized + <-Reducer 29 [BROADCAST_EDGE] vectorized BROADCAST [RS_299] Group By Operator [GBY_298] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] Group By Operator [GBY_296] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_295] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_293] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_291] - Group By Operator [GBY_290] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=41342400)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_174] - Group By Operator [GBY_173] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=41342400)"] - Select Operator [SEL_172] (rows=41342400 width=15) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_242] diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index bd6f45f051..ea0bffbec8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -167,16 +167,16 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 11 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 5 <- Map 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 18 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -188,142 +188,144 @@ Stage-0 File Output Operator [FS_244] Limit [LIM_243] (rows=100 width=1014) Number of rows:100 - Select Operator [SEL_242] (rows=4436665 width=1014) + Select Operator [SEL_242] (rows=3666666 width=1014) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_241] - Select Operator [SEL_240] (rows=4436665 width=1014) + Select Operator [SEL_240] (rows=3666666 width=1014) Output:["_col4","_col5","_col6","_col7"] - Group By Operator [GBY_239] (rows=4436665 width=1014) + Group By Operator [GBY_239] (rows=3666666 width=1014) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=8873331 width=1014) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","count(_col4)","sum(_col16)","count(_col16)","sum(_col15)","count(_col15)"],keys:_col28 - Merge Join Operator [MERGEJOIN_206] (rows=8873331 width=1014) - Conds:RS_44._col13=RS_238._col0(Inner),Output:["_col4","_col15","_col16","_col28"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] - PartitionCols:_col0 - Select Operator [SEL_237] (rows=72 width=200) - Output:["_col0","_col1"] - Filter Operator [FIL_236] (rows=72 width=200) - predicate:r_reason_sk is not null - TableScan [TS_21] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col13 - Merge Join Operator [MERGEJOIN_205] (rows=8066665 width=1014) - Conds:RS_41._col2=RS_217._col0(Inner),Output:["_col4","_col13","_col15","_col16"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + Group By Operator [GBY_48] (rows=7333332 width=1014) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col19 + Select Operator [SEL_47] (rows=7333332 width=1014) + Output:["_col6","_col7","_col12","_col19"] + Filter Operator [FIL_46] (rows=7333332 width=1014) + predicate:((((_col27 = 'KY') or (_col27 = 'GA') or (_col27 = 'NM')) and _col14 BETWEEN 100 AND 200) or (((_col27 = 'MT') or (_col27 = 'OR') or (_col27 = 'IN')) and _col14 BETWEEN 150 AND 300) or (((_col27 = 'WI') or (_col27 = 'MO') or (_col27 = 'WV')) and _col14 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_206] (rows=22000000 width=1014) + Conds:RS_43._col2=RS_238._col0(Inner),Output:["_col6","_col7","_col12","_col14","_col19","_col27"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] PartitionCols:_col0 - Select Operator [SEL_216] (rows=4602 width=585) - Output:["_col0"] - Filter Operator [FIL_215] (rows=4602 width=585) - predicate:wp_web_page_sk is not null - TableScan [TS_18] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_41] + Select Operator [SEL_237] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_236] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and ca_address_sk is not null) + TableScan [TS_21] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col2 - Filter Operator [FIL_40] (rows=7333332 width=1014) - predicate:((((_col24 = 'KY') or (_col24 = 'GA') or (_col24 = 'NM')) and _col6 BETWEEN 100 AND 200) or (((_col24 = 'MT') or (_col24 = 'OR') or (_col24 = 'IN')) and _col6 BETWEEN 150 AND 300) or (((_col24 = 'WI') or (_col24 = 'MO') or (_col24 = 'WV')) and _col6 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_204] (rows=22000000 width=1014) - Conds:RS_37._col11=RS_235._col0(Inner),Output:["_col2","_col4","_col6","_col13","_col15","_col16","_col24"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] - PartitionCols:_col0 - Select Operator [SEL_234] (rows=20000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_233] (rows=20000000 width=1014) - predicate:((ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_15] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col11 - Merge Join Operator [MERGEJOIN_203] (rows=5856506 width=135) - Conds:RS_34._col12, _col18, _col19=RS_231._col0, _col1, _col2(Inner),Output:["_col2","_col4","_col6","_col11","_col13","_col15","_col16"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_230] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_229] (rows=1861800 width=385) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_12] (rows=1861800 width=385) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col12, _col18, _col19 - Filter Operator [FIL_33] (rows=5324097 width=135) - predicate:(((_col18 = 'D') and (_col19 = 'Primary') and _col5 BETWEEN 50 AND 100) or ((_col18 = 'M') and (_col19 = '4 yr Degree') and _col5 BETWEEN 100 AND 150) or ((_col18 = 'U') and (_col19 = 'Advanced Degree') and _col5 BETWEEN 150 AND 200)) - Merge Join Operator [MERGEJOIN_202] (rows=63889183 width=135) - Conds:RS_30._col10=RS_232._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col11","_col12","_col13","_col15","_col16","_col18","_col19"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_230] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_201] (rows=58081075 width=135) - Conds:RS_27._col1, _col3=RS_228._col0, _col5(Inner),Output:["_col2","_col4","_col5","_col6","_col10","_col11","_col12","_col13","_col15","_col16"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - PartitionCols:_col0, _col5 - Select Operator [SEL_227] (rows=14398467 width=92) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_226] (rows=14398467 width=92) - predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) - TableScan [TS_6] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_200] (rows=52800977 width=135) - Conds:RS_225._col0=RS_209._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0 - Select Operator [SEL_208] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_207] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - PartitionCols:_col0 - Select Operator [SEL_224] (rows=48000888 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_223] (rows=48000888 width=135) - predicate:((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_42_web_page_wp_web_page_sk_min) AND DynamicValue(RS_42_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_42_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_214] - Group By Operator [GBY_213] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] - Group By Operator [GBY_211] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_210] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_208] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_222] - Group By Operator [GBY_221] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] - Group By Operator [GBY_219] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_218] (rows=4602 width=585) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] + Merge Join Operator [MERGEJOIN_205] (rows=7086373 width=135) + Conds:RS_40._col3, _col21, _col22=RS_234._col0, _col1, _col2(Inner),Output:["_col2","_col6","_col7","_col12","_col14","_col19"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_233] (rows=1861800 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_232] (rows=1861800 width=385) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_18] (rows=1861800 width=385) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col3, _col21, _col22 + Filter Operator [FIL_39] (rows=6442158 width=135) + predicate:(((_col21 = 'D') and (_col22 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col21 = 'M') and (_col22 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col21 = 'U') and (_col22 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) + Merge Join Operator [MERGEJOIN_204] (rows=77305913 width=135) + Conds:RS_36._col1=RS_235._col0(Inner),Output:["_col2","_col3","_col6","_col7","_col12","_col13","_col14","_col19","_col21","_col22"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_233] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_203] (rows=70278102 width=135) + Conds:RS_33._col4=RS_231._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col12","_col13","_col14","_col19"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0 + Select Operator [SEL_230] (rows=72 width=200) + Output:["_col0","_col1"] + Filter Operator [FIL_229] (rows=72 width=200) + predicate:r_reason_sk is not null + TableScan [TS_12] (rows=72 width=200) + default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_202] (rows=63889183 width=135) + Conds:RS_30._col10=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col12","_col13","_col14"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=4602 width=585) + Output:["_col0"] + Filter Operator [FIL_218] (rows=4602 width=585) + predicate:wp_web_page_sk is not null + TableScan [TS_9] (rows=4602 width=585) + default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col10 + Merge Join Operator [MERGEJOIN_201] (rows=58081075 width=135) + Conds:RS_27._col8=RS_212._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col10","_col12","_col13","_col14"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] + PartitionCols:_col0 + Select Operator [SEL_211] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_210] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col8 + Merge Join Operator [MERGEJOIN_200] (rows=52800977 width=135) + Conds:RS_209._col0, _col5=RS_228._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_209] + PartitionCols:_col0, _col5 + Select Operator [SEL_208] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_207] (rows=14398467 width=92) + predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) + TableScan [TS_0] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col1, _col3 + Select Operator [SEL_227] (rows=48000888 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_226] (rows=48000888 width=135) + predicate:((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_31_web_page_wp_web_page_sk_min) AND DynamicValue(RS_31_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_31_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_3] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_217] + Group By Operator [GBY_216] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_215] + Group By Operator [GBY_214] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_213] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_211] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + Group By Operator [GBY_222] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_221] (rows=4602 width=585) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_219] diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index 96b489ca59..2e25e6584a 100644 --- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -409,9 +409,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -428,70 +428,68 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Execution mode: vectorized Map 7 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p4 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -499,53 +497,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col6 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -571,9 +569,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -590,70 +588,68 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Execution mode: vectorized Map 7 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p4 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -661,53 +657,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col6 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index faef01c941..17c8b3df95 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -158,32 +158,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p2 @@ -197,31 +197,31 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 7 Map Operator Tree: @@ -249,38 +249,33 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: int), _col10 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + 0 _col10 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -288,14 +283,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index 85cfd0d8c3..a00dea9ccb 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -162,32 +162,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p2 @@ -201,31 +201,31 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 7 Map Operator Tree: @@ -253,38 +253,33 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: int), _col10 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + 0 _col10 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -292,14 +287,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out index 0d796ce823..f5d2b8073f 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out @@ -205,6 +205,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: p2 @@ -218,13 +238,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan alias: p3 @@ -244,26 +264,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -290,53 +290,49 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) + 0 _col10 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col18 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col18 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col18 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out index bda503ac0f..ab389824a0 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out @@ -209,6 +209,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: p2 @@ -222,13 +242,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan alias: p3 @@ -248,26 +268,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -294,53 +294,49 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) + 0 _col10 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col18 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col18 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col18 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator