diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index fa90750766..f51ff0620c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -908,7 +908,8 @@ minillaplocal.query.files=\ dynamic_semijoin_reduction_sw2.q,\ partialdhj.q,\ stats_date.q,\ - dst.q + dst.q,\ + q93_with_constraints.q encrypted.query.files=encryption_join_unencrypted_tbl.q,\ encryption_insert_partition_static.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java index 8214cc9a0d..e684432606 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java @@ -27,6 +27,21 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule { + public static final HiveJoinProjectTransposeRule LEFF_PROJECT_BTW_JOIN = + new HiveJoinProjectTransposeRule( + operand(HiveJoin.class, + operand(HiveProject.class, operand(HiveJoin.class, any())), + operand(RelNode.class, any())), + "JoinProjectTransposeRule(Project-Join-Other)", + false, HiveRelFactories.HIVE_BUILDER); + public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_BTW_JOIN = + new HiveJoinProjectTransposeRule( + operand(HiveJoin.class, + operand(RelNode.class, any()), + operand(HiveProject.class, operand(HiveJoin.class, any()))), + "JoinProjectTransposeRule(Other-Project-Join)", + false, HiveRelFactories.HIVE_BUILDER); + public static final HiveJoinProjectTransposeRule BOTH_PROJECT = new HiveJoinProjectTransposeRule( operand(HiveJoin.class, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d2d5152eff..361f150193 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1795,7 +1795,22 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calcitePreCboPlan, mdProvider.getMetadataProvider(), executorProvider); } - // 4. Apply join order optimizations: reordering MST algorithm + // Get rid of sq_count_check if group by key is constant + if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + calcitePreCboPlan = + hepPlan(calcitePreCboPlan, false, mdProvider.getMetadataProvider(), null, + HiveRemoveSqCountCheck.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Removing sq_count_check UDF "); + } + // 4.1 Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin. + // Don't run this rule if hive is to remove sq_count_check since that rule expects to have project b/w join. + calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, + HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFF_PROJECT_BTW_JOIN, + HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN); + + // 4.2 Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with // the rest of optimizations if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) { @@ -1884,17 +1899,8 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveRemoveGBYSemiJoinRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Removal of gby from semijoin"); - // 9. Get rid of sq_count_check if group by key is constant (HIVE-) - if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = - hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HiveRemoveSqCountCheck.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Removing sq_count_check UDF "); - } - // 10. Run rule to fix windowing issue when it is done over + // 9. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); @@ -1903,7 +1909,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Window fixing rule"); } - // 11. Apply Druid transformation rules + // 10. Apply Druid transformation rules perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, @@ -1934,7 +1940,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu JDBCAggregationPushDownRule.INSTANCE, JDBCSortPushDownRule.INSTANCE ); - // 12. Run rules to aid in translation from Calcite tree to Hive tree + // 11. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // 12.1. Merge join into multijoin operators (if possible) @@ -1955,7 +1961,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, HiveProjectFilterPullUpConstantsRule.INSTANCE); - // 12.2. Introduce exchange operators below join/multijoin operators + // 11.2. Introduce exchange operators below join/multijoin operators calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); @@ -2144,6 +2150,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Rerun PPD"); + return basePlan; } diff --git a/ql/src/test/queries/clientpositive/q93_with_constraints.q b/ql/src/test/queries/clientpositive/q93_with_constraints.q new file mode 100644 index 0000000000..2d46bccb8d --- /dev/null +++ b/ql/src/test/queries/clientpositive/q93_with_constraints.q @@ -0,0 +1,88 @@ +drop table if exists store_sales; +create external table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost decimal(7,2), + ss_list_price decimal(7,2), + ss_sales_price decimal(7,2), + ss_ext_discount_amt decimal(7,2), + ss_ext_sales_price decimal(7,2), + ss_ext_wholesale_cost decimal(7,2), + ss_ext_list_price decimal(7,2), + ss_ext_tax decimal(7,2), + ss_coupon_amt decimal(7,2), + ss_net_paid decimal(7,2), + ss_net_paid_inc_tax decimal(7,2), + ss_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +alter table store_sales update statistics set ('numRows'='575995635'); + +drop table if exists store_returns; +create external table store_returns +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt decimal(7,2), + sr_return_tax decimal(7,2), + sr_return_amt_inc_tax decimal(7,2), + sr_fee decimal(7,2), + sr_return_ship_cost decimal(7,2), + sr_refunded_cash decimal(7,2), + sr_reversed_charge decimal(7,2), + sr_store_credit decimal(7,2), + sr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +alter table store_returns update statistics set ('numRows'='57591150'); + +drop table if exists reason; +create external table reason +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +alter table reason update statistics set ('numRows'='72'); + +alter table store_returns add constraint tpcds_pk_sr primary key (sr_item_sk, sr_ticket_number) disable novalidate rely; + +explain +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100; diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out index aab38e5768..483139c51c 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -105,14 +105,13 @@ JOIN ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-5 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -168,59 +167,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -255,21 +202,50 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 2 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 2 Data size: 127864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 127864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 127864 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: PREHOOK: query: INSERT OVERWRITE TABLE dest_co1 SELECT b.key, d.val @@ -327,73 +303,97 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: n + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2800 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE TableScan - alias: m + alias: y filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x + alias: n filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2800 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: y + alias: m filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 4 Data size: 120990 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 12 Data size: 375069 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 13 Data size: 412575 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 412575 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 412575 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 Join Operator condition map: Inner Join 0 to 1 @@ -407,57 +407,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 133089 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 8 Data size: 266178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 375069 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 8 Data size: 292795 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 13 Data size: 412575 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col1 (type: string) + expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 292795 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 412575 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 292795 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 412575 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co2 - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 133089 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 8 Data size: 266178 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 8 Data size: 292795 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 292795 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 292795 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 Stage: Stage-0 Move Operator @@ -520,28 +492,60 @@ JOIN ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-9 depends on stages: Stage-10, Stage-11 , consists of Stage-12, Stage-13, Stage-2 - Stage-12 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-3 depends on stages: Stage-0 - Stage-13 has a backup stage: Stage-2 - Stage-8 depends on stages: Stage-13 - Stage-2 - Stage-15 is a root stage - Stage-11 depends on stages: Stage-15 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-14 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:m + $hdt$_0:x + Fetch Operator + limit: -1 + $hdt$_1:y + Fetch Operator + limit: -1 + $hdt$_2:$hdt$_3:m Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:m + $hdt$_0:x + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + $hdt$_1:y + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + $hdt$_2:$hdt$_3:m TableScan alias: m filterExpr: key is not null (type: boolean) @@ -558,7 +562,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-10 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -584,57 +588,28 @@ STAGE PLANS: expressions: _col2 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-9 - Conditional Operator - - Stage: Stage-12 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co3 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 2 Data size: 127864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 127864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 127864 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -649,146 +624,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: - Stage: Stage-13 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co3 - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3388 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co3 - - Stage: Stage-15 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:y - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:y - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 57910 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 63932 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - PREHOOK: query: INSERT OVERWRITE TABLE dest_co3 SELECT b.key, d.val FROM diff --git a/ql/src/test/results/clientpositive/join28.q.out b/ql/src/test/results/clientpositive/join28.q.out index 8a3d79da47..ed1ab26e90 100644 --- a/ql/src/test/results/clientpositive/join28.q.out +++ b/ql/src/test/results/clientpositive/join28.q.out @@ -23,40 +23,41 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0, Stage-3 Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_1:y Fetch Operator limit: -1 - $hdt$_1:$hdt$_2:x + $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_1:y TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_1:$hdt$_2:x + 2 _col0 (type: string) + $hdt$_2:x TableScan alias: x filterExpr: key is not null (type: boolean) @@ -72,68 +73,59 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out index 1c873e2fe4..37fe85cb44 100644 --- a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out @@ -732,8 +732,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -756,7 +755,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: t1_n127 @@ -776,7 +775,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: t3_n29 @@ -803,38 +802,24 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 935 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 935 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 935 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index cb5f357780..b075ecf2f5 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -1627,33 +1627,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 Map Operator Tree: TableScan alias: x @@ -1686,36 +1665,37 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1731,7 +1711,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1746,7 +1726,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1810,33 +1790,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 Map Operator Tree: TableScan alias: x @@ -1869,36 +1828,37 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1914,7 +1874,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1929,7 +1889,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out index c9bff2895c..a664e02ae9 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out @@ -676,11 +676,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -708,7 +707,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: x @@ -729,7 +728,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: x @@ -755,34 +754,15 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 2 _col0 (type: string) + outputColumnNames: _col2, _col3, _col4 Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col2) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) + expressions: hash(_col3) (type: int), hash(_col4) (type: int), hash(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -794,7 +774,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -809,7 +789,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -832,7 +812,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -896,11 +876,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -928,7 +907,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: x @@ -949,7 +928,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: x @@ -975,34 +954,15 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 2 _col0 (type: string) + outputColumnNames: _col2, _col3, _col4 Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col2) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) + expressions: hash(_col3) (type: int), hash(_col4) (type: int), hash(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1014,7 +974,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1029,7 +989,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1052,7 +1012,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1138,40 +1098,29 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + 2 _col0 (type: string) + outputColumnNames: _col2, _col3, _col4 input vertices: 1 Map 3 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + 2 Reducer 5 + Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col2) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + expressions: hash(_col3) (type: int), hash(_col4) (type: int), hash(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out index 605686591f..e85fd5ed51 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out @@ -148,7 +148,7 @@ POSTHOOK: Output: default@srcpart_small_n2 POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[93][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[92][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: EXPLAIN SELECT count(*) FROM ( @@ -212,10 +212,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_n6 - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_n2_key1_min) AND DynamicValue(RS_10_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key BETWEEN DynamicValue(RS_10_srcpart_small_n2_key1_min) AND DynamicValue(RS_10_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n2_key1_bloom_filter))) and key is not null) (type: boolean) + predicate: ((key BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter))) and key is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -245,10 +245,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_int_n0 - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_n6_key_min) AND DynamicValue(RS_9_srcpart_date_n6_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_n6_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_n2_key1_min) AND DynamicValue(RS_10_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_23_srcpart_date_n6_key_min) AND DynamicValue(RS_23_srcpart_date_n6_key_max) and in_bloom_filter(cstring, DynamicValue(RS_23_srcpart_date_n6_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((cstring BETWEEN DynamicValue(RS_10_srcpart_small_n2_key1_min) AND DynamicValue(RS_10_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n2_key1_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_n6_key_min) AND DynamicValue(RS_9_srcpart_date_n6_key_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_n6_key_bloom_filter))) and cstring is not null) (type: boolean) + predicate: ((cstring BETWEEN DynamicValue(RS_23_srcpart_date_n6_key_min) AND DynamicValue(RS_23_srcpart_date_n6_key_max) and in_bloom_filter(cstring, DynamicValue(RS_23_srcpart_date_n6_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter))) and cstring is not null) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -265,10 +265,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_n6 - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_19_srcpart_small_n2_key1_min) AND DynamicValue(RS_19_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_19_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter))) and key is not null) (type: boolean) + predicate: ((key BETWEEN DynamicValue(RS_19_srcpart_small_n2_key1_min) AND DynamicValue(RS_19_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_19_srcpart_small_n2_key1_bloom_filter))) and key is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -298,10 +298,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_int_n0 - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_23_srcpart_date_n6_key_min) AND DynamicValue(RS_23_srcpart_date_n6_key_max) and in_bloom_filter(cstring, DynamicValue(RS_23_srcpart_date_n6_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_18_srcpart_date_n6_key_min) AND DynamicValue(RS_18_srcpart_date_n6_key_max) and in_bloom_filter(cstring, DynamicValue(RS_18_srcpart_date_n6_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_19_srcpart_small_n2_key1_min) AND DynamicValue(RS_19_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_19_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((cstring BETWEEN DynamicValue(RS_23_srcpart_date_n6_key_min) AND DynamicValue(RS_23_srcpart_date_n6_key_max) and in_bloom_filter(cstring, DynamicValue(RS_23_srcpart_date_n6_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter))) and cstring is not null) (type: boolean) + predicate: ((cstring BETWEEN DynamicValue(RS_18_srcpart_date_n6_key_min) AND DynamicValue(RS_18_srcpart_date_n6_key_max) and in_bloom_filter(cstring, DynamicValue(RS_18_srcpart_date_n6_key_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_19_srcpart_small_n2_key1_min) AND DynamicValue(RS_19_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_19_srcpart_small_n2_key1_bloom_filter))) and cstring is not null) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index f1b4fb2ed3..70ea9aca71 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -1205,17 +1205,16 @@ SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `t5`.`key`, `t0`.`value`, `t5`.`value` AS `value1` +OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t4`.`value` AS `value1` FROM (SELECT `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `t4`.`key`, `t4`.`value` -FROM (SELECT `key` +INNER JOIN ((SELECT `key` FROM `default`.`src` WHERE `key` IS NOT NULL) AS `t2` INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) AS `t5` ON `t0`.`value` = `t5`.`value` +WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1227,91 +1226,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: z - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - Map 4 Map Operator Tree: TableScan alias: y @@ -1329,28 +1249,24 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 5 => 25 + Estimated key counts: Map 4 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 5 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: _col0 (type: string) - auto parallelism: true + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: @@ -1406,7 +1322,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 5 + Map 4 Map Operator Tree: TableScan alias: x @@ -1485,6 +1401,85 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] + Map 5 + Map Operator Tree: + TableScan + alias: z + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] Reducer 2 Execution mode: llap Needs Tagging: false @@ -1493,13 +1488,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - Position of Big Table: 0 + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Position of Big Table: 1 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1753,16 +1748,15 @@ SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t4`.`value` AS `value1` +OPTIMIZED SQL: SELECT `t2`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `t3`.`key`, `t3`.`value` -FROM (SELECT `key` -FROM `default`.`src`) AS `t1` -RIGHT JOIN (SELECT `key`, `value` +INNER JOIN ((SELECT `key`, `value` FROM `default`.`src1` -WHERE `value` IS NOT NULL) AS `t3` ON `t1`.`key` = `t3`.`key`) AS `t4` ON `t0`.`value` = `t4`.`value` +WHERE `value` IS NOT NULL) AS `t2` +LEFT JOIN (SELECT `key` +FROM `default`.`src`) AS `t3` ON `t2`.`key` = `t3`.`key`) ON `t0`.`value` = `t2`.`value` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1774,33 +1768,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: z + alias: x filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 + value expressions: _col1 (type: string) auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs @@ -1809,36 +1804,33 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1846,19 +1838,21 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - Map 4 + /src1 [x] + Map 5 Map Operator Tree: TableScan alias: y @@ -1874,7 +1868,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 + tag: 1 auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs @@ -1934,26 +1928,25 @@ STAGE PLANS: Map 6 Map Operator Tree: TableScan - alias: x + alias: z filterExpr: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: _col1 (type: string) auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs @@ -1962,33 +1955,36 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1996,35 +1992,55 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] + /srcpart/ds=2008-04-08/hr=11 [z] Reducer 2 Execution mode: llap Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - Position of Big Table: 1 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Position of Big Table: 0 + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2076,7 +2092,7 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) auto parallelism: false - Reducer 3 + Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -2107,32 +2123,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 5 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Position of Big Table: 1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: _col0 (type: string) - auto parallelism: true Stage: Stage-2 Dependency Collection @@ -2327,32 +2317,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 Map Operator Tree: TableScan alias: y @@ -2373,21 +2343,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 5 + 1 Map 4 Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: x @@ -2408,6 +2374,26 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: x + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2415,12 +2401,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2620,32 +2606,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: y - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 Map Operator Tree: TableScan alias: y @@ -2666,21 +2632,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 5 + 1 Map 4 Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: x @@ -2701,6 +2663,26 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2708,12 +2690,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out b/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out index 33da686fe3..8ac21ff54c 100644 --- a/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out @@ -644,15 +644,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col7 (type: int), _col0 (type: smallint), _col1 (type: string), _col2 (type: smallint), _col3 (type: string), _col5 (type: smallint), _col6 (type: string), _col9 (type: smallint), _col10 (type: smallint), _col12 (type: string), _col13 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + expressions: _col7 (type: int), _col0 (type: smallint), _col1 (type: string), _col2 (type: smallint), _col3 (type: string), _col5 (type: smallint), _col6 (type: string), _col9 (type: smallint), _col10 (type: smallint), _col11 (type: string), _col12 (type: string), _col13 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: smallint), _col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col5 (type: smallint), _col6 (type: string), _col7 (type: smallint), _col8 (type: smallint), _col9 (type: string), _col10 (type: string), _col11 (type: string) + value expressions: _col2 (type: smallint), _col3 (type: string), _col4 (type: smallint), _col5 (type: string), _col7 (type: smallint), _col8 (type: string), _col9 (type: smallint), _col10 (type: smallint), _col11 (type: string), _col12 (type: string), _col13 (type: string) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -662,22 +662,26 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15 Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col12 is null (type: boolean) + Select Operator + expressions: _col0 (type: int), _col2 (type: smallint), _col3 (type: string), _col4 (type: smallint), _col5 (type: string), _col7 (type: smallint), _col8 (type: string), _col9 (type: smallint), _col10 (type: smallint), _col12 (type: string), _col13 (type: string), _col11 (type: string), _col15 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col5 (type: smallint), _col6 (type: string), _col7 (type: smallint), _col8 (type: smallint), _col9 (type: string), _col10 (type: string), _col11 (type: string), '201611160940' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Filter Operator + predicate: _col12 is null (type: boolean) + Statistics: Num rows: 1 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col5 (type: smallint), _col6 (type: string), _col7 (type: smallint), _col8 (type: smallint), _col9 (type: string), _col10 (type: string), _col11 (type: string), '201611160940' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out index 261e153aba..96ada7897e 100644 --- a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out @@ -211,28 +211,24 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -377,28 +373,24 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -545,32 +537,28 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -853,61 +841,53 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: string) - 1 _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 4 Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 input vertices: - 1 Map 5 - Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE + 1 Map 6 + Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col6) (type: int), hash(_col7) (type: int), hash(_col4) (type: int), hash(_col5) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - input vertices: - 1 Map 6 - Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -1193,61 +1173,53 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: string) - 1 _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 4 Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 input vertices: - 1 Map 5 - Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE + 1 Map 6 + Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col6) (type: int), hash(_col7) (type: int), hash(_col4) (type: int), hash(_col5) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - input vertices: - 1 Map 6 - Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -1626,16 +1598,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1643,12 +1611,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col2 (type: string) 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1668,16 +1636,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -1690,7 +1654,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col6) (type: int), hash(_col7) (type: int), hash(_col4) (type: int), hash(_col5) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1997,16 +1961,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2014,12 +1974,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col2 (type: string) 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2039,16 +1999,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -2061,7 +2017,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col6) (type: int), hash(_col7) (type: int), hash(_col4) (type: int), hash(_col5) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out b/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out new file mode 100644 index 0000000000..8c2eb2224f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out @@ -0,0 +1,373 @@ +PREHOOK: query: drop table if exists store_sales +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists store_sales +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost decimal(7,2), + ss_list_price decimal(7,2), + ss_sales_price decimal(7,2), + ss_ext_discount_amt decimal(7,2), + ss_ext_sales_price decimal(7,2), + ss_ext_wholesale_cost decimal(7,2), + ss_ext_list_price decimal(7,2), + ss_ext_tax decimal(7,2), + ss_coupon_amt decimal(7,2), + ss_net_paid decimal(7,2), + ss_net_paid_inc_tax decimal(7,2), + ss_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create external table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost decimal(7,2), + ss_list_price decimal(7,2), + ss_sales_price decimal(7,2), + ss_ext_discount_amt decimal(7,2), + ss_ext_sales_price decimal(7,2), + ss_ext_wholesale_cost decimal(7,2), + ss_ext_list_price decimal(7,2), + ss_ext_tax decimal(7,2), + ss_coupon_amt decimal(7,2), + ss_net_paid decimal(7,2), + ss_net_paid_inc_tax decimal(7,2), + ss_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: alter table store_sales update statistics set ('numRows'='575995635') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: alter table store_sales update statistics set ('numRows'='575995635') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table if exists store_returns +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists store_returns +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table store_returns +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt decimal(7,2), + sr_return_tax decimal(7,2), + sr_return_amt_inc_tax decimal(7,2), + sr_fee decimal(7,2), + sr_return_ship_cost decimal(7,2), + sr_refunded_cash decimal(7,2), + sr_reversed_charge decimal(7,2), + sr_store_credit decimal(7,2), + sr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_returns +POSTHOOK: query: create external table store_returns +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt decimal(7,2), + sr_return_tax decimal(7,2), + sr_return_amt_inc_tax decimal(7,2), + sr_fee decimal(7,2), + sr_return_ship_cost decimal(7,2), + sr_refunded_cash decimal(7,2), + sr_reversed_charge decimal(7,2), + sr_store_credit decimal(7,2), + sr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_returns +PREHOOK: query: alter table store_returns update statistics set ('numRows'='57591150') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@store_returns +PREHOOK: Output: default@store_returns +POSTHOOK: query: alter table store_returns update statistics set ('numRows'='57591150') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: default@store_returns +PREHOOK: query: drop table if exists reason +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists reason +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table reason +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@reason +POSTHOOK: query: create external table reason +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@reason +PREHOOK: query: alter table reason update statistics set ('numRows'='72') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@reason +PREHOOK: Output: default@reason +POSTHOOK: query: alter table reason update statistics set ('numRows'='72') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@reason +POSTHOOK: Output: default@reason +PREHOOK: query: alter table store_returns add constraint tpcds_pk_sr primary key (sr_item_sk, sr_ticket_number) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table store_returns add constraint tpcds_pk_sr primary key (sr_item_sk, sr_ticket_number) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: explain +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_returns + filterExpr: sr_reason_sk is not null (type: boolean) + Statistics: Num rows: 57591150 Data size: 875385504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: sr_reason_sk is not null (type: boolean) + Statistics: Num rows: 54711593 Data size: 831616236 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_reason_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 54711593 Data size: 831616236 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 54711593 Data size: 831616236 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: reason + filterExpr: ((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) (type: boolean) + Statistics: Num rows: 72 Data size: 13160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 365 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: r_reason_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 365 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 365 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 70041069312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 60182753 Data size: 914777879 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 60182753 Data size: 914777879 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col7, _col9, _col10 + Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int), CASE WHEN (_col3 is not null) THEN ((CAST( (_col9 - _col3) AS decimal(10,0)) * _col10)) ELSE ((CAST( _col9 AS decimal(10,0)) * _col10)) END (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(28,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 285117845 Data size: 34670330027 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: decimal(28,2)), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 285117845 Data size: 34670330027 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: decimal(28,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 285117845 Data size: 34670330027 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 12100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 12100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 48fb6794ea..f7c8ec7e31 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -1888,7 +1888,7 @@ POSTHOOK: Input: default@part_null_n0 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0) @@ -1984,14 +1984,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 residual filter predicates: {(_col5 >= _col9)} Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + sort order: Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2001,8 +1997,8 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - residual filter predicates: {(_col7 <= _col9)} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11 + residual filter predicates: {(_col7 <= _col11)} Statistics: Num rows: 3 Data size: 1947 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -2053,7 +2049,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5071,7 +5067,7 @@ POSTHOOK: Input: default@emps_n4 110 John 40 M Vancouver 2 NULL false true 2002-05-03 120 Wilma 20 F NULL 1 5 NULL true 2005-09-07 130 Alice 40 F Vancouver 2 NULL false true 2007-01-01 -Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from emps_n4 where deptno <> (select sum(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3) PREHOOK: type: QUERY POSTHOOK: query: explain select * from emps_n4 where deptno <> (select sum(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3) @@ -5167,14 +5163,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 residual filter predicates: {(UDFToLong(_col2) <> _col10)} Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 3476 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -5184,8 +5176,8 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - residual filter predicates: {(UDFToLong(_col0) > _col10)} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col12 + residual filter predicates: {(UDFToLong(_col0) > _col12)} Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) diff --git a/ql/src/test/results/clientpositive/llap/tez_self_join.q.out b/ql/src/test/results/clientpositive/llap/tez_self_join.q.out index a362411d41..ca0dace190 100644 --- a/ql/src/test/results/clientpositive/llap/tez_self_join.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_self_join.q.out @@ -42,7 +42,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tez_self_join2 POSTHOOK: Lineage: tez_self_join2.id1 SCRIPT [] -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select s.id2, s.id3 from @@ -74,8 +74,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (XPROD_EDGE), Map 5 (XPROD_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -98,7 +98,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: self1 @@ -112,9 +112,11 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -142,20 +144,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2 + outputColumnNames: _col3 Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'ab' (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: string) + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -164,18 +159,19 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col3 + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col2 (type: string) + expressions: 'ab' (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -183,7 +179,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select s.id2, s.id3 from ( diff --git a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 1bfe5af603..9e17e94147 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -22,49 +22,45 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: v2 - filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + alias: v3 + filterExpr: csmallint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE + predicate: csmallint is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 27400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint) + expressions: csmallint (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 27400 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col2, _col3 + 0 _col0 (type: smallint) + 1 _col1 (type: smallint) + outputColumnNames: _col1, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 483205 Data size: 5777652 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: smallint), _col3 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 483205 Data size: 5777652 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 782372 Data size: 6245104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 11087 Data size: 112232 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 782315 Data size: 6244648 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) + value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -90,11 +86,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6848 Data size: 81820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: tinyint) + key expressions: _col1 (type: smallint) sort order: + - Map-reduce partition columns: _col0 (type: tinyint) + Map-reduce partition columns: _col1 (type: smallint) Statistics: Num rows: 6848 Data size: 81820 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: smallint), _col2 (type: double) + value expressions: _col0 (type: tinyint), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -109,21 +105,21 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: v3 - filterExpr: csmallint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + alias: v2 + filterExpr: ctinyint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: csmallint is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 27400 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: csmallint (type: smallint) + expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 27400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: smallint) + key expressions: _col0 (type: tinyint) sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 9174 Data size: 27400 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 9173 Data size: 27396 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -171,4 +167,4 @@ POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -6.065190932486892E11 +6.06519093248863E11 diff --git a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out index 6116d382e8..7f754a662d 100644 --- a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out @@ -13,38 +13,39 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-4 depends on stages: Stage-6 + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_1:y Fetch Operator limit: -1 - $hdt$_1:$hdt$_2:x + $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_1:y TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_1:$hdt$_2:x + 2 _col0 (type: string) + $hdt$_2:x TableScan alias: x filterExpr: key is not null (type: boolean) @@ -60,52 +61,43 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -260,38 +252,39 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-4 depends on stages: Stage-6 + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_1:y Fetch Operator limit: -1 - $hdt$_1:$hdt$_2:x + $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_1:y TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_1:$hdt$_2:x + 2 _col0 (type: string) + $hdt$_2:x TableScan alias: x filterExpr: key is not null (type: boolean) @@ -307,52 +300,43 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out b/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out index 50e7ce0f3c..89aac8f58a 100644 --- a/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out @@ -79,22 +79,22 @@ FROM JOIN z ON (subq.key1 = z.id) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:y + $hdt$_0:y Fetch Operator limit: -1 - $hdt$_1:z + $hdt$_2:z Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:y + $hdt$_0:y TableScan alias: y filterExpr: id is not null (type: boolean) @@ -110,7 +110,8 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - $hdt$_1:z + 2 _col0 (type: int) + $hdt$_2:z TableScan alias: z filterExpr: id is not null (type: boolean) @@ -125,9 +126,10 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: int) - 1 _col0 (type: int) + 1 _col1 (type: int) + 2 _col0 (type: int) - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -144,30 +146,24 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 66 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: int), _col2 (type: string), _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col3 (type: int), _col2 (type: string), _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/perf/spark/query2.q.out b/ql/src/test/results/clientpositive/perf/spark/query2.q.out index 105f2ee1ae..f6156613e6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query2.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query2.q.out @@ -124,14 +124,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 459), Map 14 (PARTITION-LEVEL SORT, 459), Map 15 (PARTITION-LEVEL SORT, 459) - Reducer 12 <- Reducer 11 (GROUP, 504) - Reducer 13 <- Map 16 (PARTITION-LEVEL SORT, 253), Reducer 12 (PARTITION-LEVEL SORT, 253) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 459), Map 7 (PARTITION-LEVEL SORT, 459), Map 8 (PARTITION-LEVEL SORT, 459) + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 459), Map 14 (PARTITION-LEVEL SORT, 459), Map 9 (PARTITION-LEVEL SORT, 459) + Reducer 11 <- Reducer 10 (GROUP, 504) + Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 253), Reducer 11 (PARTITION-LEVEL SORT, 253) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 459), Map 6 (PARTITION-LEVEL SORT, 459), Map 7 (PARTITION-LEVEL SORT, 459) Reducer 3 <- Reducer 2 (GROUP, 504) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 253), Reducer 3 (PARTITION-LEVEL SORT, 253) - Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 554), Reducer 4 (PARTITION-LEVEL SORT, 554) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 529), Reducer 12 (PARTITION-LEVEL SORT, 529), Reducer 3 (PARTITION-LEVEL SORT, 529) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -154,27 +153,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: web_sales - filterExpr: ws_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ws_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_sold_date_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - Execution mode: vectorized - Map 14 + Map 13 Map Operator Tree: TableScan alias: catalog_sales @@ -194,7 +173,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized - Map 15 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -214,7 +193,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: string) Execution mode: vectorized - Map 16 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -233,7 +212,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: catalog_sales @@ -253,7 +232,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -273,7 +252,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: string) Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -292,7 +271,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 11 + Map 9 + Map Operator Tree: + TableScan + alias: web_sales + filterExpr: ws_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ws_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(7,2)) + Execution mode: vectorized + Reducer 10 Reduce Operator Tree: Join Operator condition map: @@ -318,7 +317,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) - Reducer 12 + Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -333,7 +332,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 237595882 Data size: 32218894638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) - Reducer 13 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -395,46 +394,32 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 261355475 Data size: 35440784869 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 261355475 Data size: 35440784869 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 (_col0 - 53) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE + 2 (_col0 - 53) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 574982057 Data size: 77969728401 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), round((_col1 / _col9), 2) (type: decimal(20,2)), round((_col2 / _col10), 2) (type: decimal(20,2)), round((_col3 / _col11), 2) (type: decimal(20,2)), round((_col4 / _col12), 2) (type: decimal(20,2)), round((_col5 / _col13), 2) (type: decimal(20,2)), round((_col6 / _col14), 2) (type: decimal(20,2)), round((_col7 / _col15), 2) (type: decimal(20,2)) + expressions: _col0 (type: int), round((_col1 / _col11), 2) (type: decimal(20,2)), round((_col2 / _col12), 2) (type: decimal(20,2)), round((_col3 / _col13), 2) (type: decimal(20,2)), round((_col4 / _col14), 2) (type: decimal(20,2)), round((_col5 / _col15), 2) (type: decimal(20,2)), round((_col6 / _col16), 2) (type: decimal(20,2)), round((_col7 / _col17), 2) (type: decimal(20,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574982057 Data size: 77969728401 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574982057 Data size: 77969728401 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(20,2)), _col2 (type: decimal(20,2)), _col3 (type: decimal(20,2)), _col4 (type: decimal(20,2)), _col5 (type: decimal(20,2)), _col6 (type: decimal(20,2)), _col7 (type: decimal(20,2)) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(20,2)), VALUE._col1 (type: decimal(20,2)), VALUE._col2 (type: decimal(20,2)), VALUE._col3 (type: decimal(20,2)), VALUE._col4 (type: decimal(20,2)), VALUE._col5 (type: decimal(20,2)), VALUE._col6 (type: decimal(20,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574982057 Data size: 77969728401 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 287491028 Data size: 38984864200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 574982057 Data size: 77969728401 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/spark/query59.q.out b/ql/src/test/results/clientpositive/perf/spark/query59.q.out index 73ea161fc4..1224ab68a6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query59.q.out @@ -408,28 +408,24 @@ STAGE PLANS: input vertices: 1 Map 9 Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col13 (type: string), _col0 (type: int), _col12 (type: string), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Reduce Output Operator + key expressions: _col12 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col12 (type: string), _col0 (type: int) Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: int) - Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col13 (type: string) Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string), _col1 (type: int) + 0 _col12 (type: string), _col0 (type: int) 1 _col1 (type: string), (_col0 - 52) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col12, _col13, _col14, _col15, _col16, _col17 + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col1 (type: int), (_col3 / _col12) (type: decimal(37,20)), (_col4 / _col13) (type: decimal(37,20)), (_col5 / _col5) (type: decimal(37,20)), (_col6 / _col14) (type: decimal(37,20)), (_col7 / _col15) (type: decimal(37,20)), (_col8 / _col16) (type: decimal(37,20)), (_col9 / _col17) (type: decimal(37,20)) + expressions: _col13 (type: string), _col12 (type: string), _col0 (type: int), (_col2 / _col16) (type: decimal(37,20)), (_col3 / _col17) (type: decimal(37,20)), (_col4 / _col4) (type: decimal(37,20)), (_col5 / _col18) (type: decimal(37,20)), (_col6 / _col19) (type: decimal(37,20)), (_col7 / _col20) (type: decimal(37,20)), (_col8 / _col21) (type: decimal(37,20)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query95.q.out b/ql/src/test/results/clientpositive/perf/spark/query95.q.out index 14636717af..69dcfabd52 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query95.q.out @@ -255,12 +255,12 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: wr_order_number (type: int) - outputColumnNames: _col0 + outputColumnNames: _col13 Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col13 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col13 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 8 @@ -351,11 +351,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 + 1 _col13 (type: int) + outputColumnNames: _col14 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col1 (type: int) + keys: _col14 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/tez/query2.q.out b/ql/src/test/results/clientpositive/perf/tez/query2.q.out index 5f908948aa..e2021f9b5f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query2.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query2.q.out @@ -119,167 +119,161 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 16 <- Reducer 14 (BROADCAST_EDGE), Union 17 (CONTAINS) -Map 18 <- Reducer 14 (BROADCAST_EDGE), Union 17 (CONTAINS) -Map 8 <- Reducer 10 (BROADCAST_EDGE), Union 2 (CONTAINS) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 9 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 15 (SIMPLE_EDGE), Reducer 12 (ONE_TO_ONE_EDGE) -Reducer 14 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 15 <- Reducer 13 (BROADCAST_EDGE), Union 16 (CONTAINS) +Map 17 <- Reducer 13 (BROADCAST_EDGE), Union 16 (CONTAINS) +Map 7 <- Reducer 9 (BROADCAST_EDGE), Union 2 (CONTAINS) +Reducer 10 <- Map 8 (SIMPLE_EDGE), Union 16 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (ONE_TO_ONE_EDGE) +Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) -Reducer 6 <- Reducer 13 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_195] - Select Operator [SEL_194] (rows=287491028 width=135) + Reducer 6 vectorized + File Output Operator [FS_189] + Select Operator [SEL_188] (rows=574982057 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_58] - Select Operator [SEL_57] (rows=287491028 width=135) + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_55] + Select Operator [SEL_54] (rows=574982057 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_147] (rows=287491028 width=135) - Conds:RS_54._col0=RS_55.(_col0 - 53)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_55] + Merge Join Operator [MERGEJOIN_141] (rows=574982057 width=135) + Conds:RS_179._col0=RS_186._col0(Inner),RS_179._col0=RS_52.(_col0 - 53)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + PartitionCols:_col0 + Select Operator [SEL_184] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_182] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_week_seq is not null) + TableScan [TS_20] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_week_seq","d_year"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_52] PartitionCols:(_col0 - 53) - Merge Join Operator [MERGEJOIN_146] (rows=261355475 width=135) - Conds:RS_193._col0=RS_191._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + Merge Join Operator [MERGEJOIN_140] (rows=261355475 width=135) + Conds:RS_181._col0=RS_187._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] PartitionCols:_col0 - Select Operator [SEL_189] (rows=36524 width=1119) + Select Operator [SEL_185] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_187] (rows=36524 width=1119) + Filter Operator [FIL_183] (rows=36524 width=1119) predicate:((d_year = 2002) and d_week_seq is not null) - TableScan [TS_20] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_week_seq","d_year"] - <-Reducer 12 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_193] + Please refer to the previous TableScan [TS_20] + <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_181] PartitionCols:_col0 - Group By Operator [GBY_192] (rows=237595882 width=135) + Group By Operator [GBY_180] (rows=237595882 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_44] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col0 - Group By Operator [GBY_43] (rows=475191764 width=135) + Group By Operator [GBY_39] (rows=475191764 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 - Select Operator [SEL_41] (rows=475191764 width=135) + Select Operator [SEL_37] (rows=475191764 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_145] (rows=475191764 width=135) - Conds:Union 17._col0=RS_172._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] + Merge Join Operator [MERGEJOIN_139] (rows=475191764 width=135) + Conds:Union 16._col0=RS_166._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_166] PartitionCols:_col0 - Select Operator [SEL_169] (rows=73049 width=1119) + Select Operator [SEL_163] (rows=73049 width=1119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_168] (rows=73049 width=1119) + Filter Operator [FIL_162] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_week_seq is not null) TableScan [TS_8] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq","d_day_name"] - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] vectorized - Reduce Output Operator [RS_204] + <-Union 16 [SIMPLE_EDGE] + <-Map 15 [CONTAINS] vectorized + Reduce Output Operator [RS_198] PartitionCols:_col0 - Select Operator [SEL_203] (rows=144002668 width=135) + Select Operator [SEL_197] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_202] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_158] (rows=144002668 width=135) + Filter Operator [FIL_196] (rows=144002668 width=135) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_152] (rows=144002668 width=135) Output:["ws_sold_date_sk","ws_ext_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_200] - Group By Operator [GBY_199] (rows=1 width=12) + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_194] + Group By Operator [GBY_193] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_177] - Group By Operator [GBY_175] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_171] + Group By Operator [GBY_169] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_173] (rows=73049 width=1119) + Select Operator [SEL_167] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_169] - <-Map 18 [CONTAINS] vectorized - Reduce Output Operator [RS_207] + Please refer to the previous Select Operator [SEL_163] + <-Map 17 [CONTAINS] vectorized + Reduce Output Operator [RS_201] PartitionCols:_col0 - Select Operator [SEL_206] (rows=287989836 width=135) + Select Operator [SEL_200] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_205] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_163] (rows=287989836 width=135) + Filter Operator [FIL_199] (rows=287989836 width=135) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_157] (rows=287989836 width=135) Output:["cs_sold_date_sk","cs_ext_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_201] - Please refer to the previous Group By Operator [GBY_199] - <-Reducer 5 [ONE_TO_ONE_EDGE] - FORWARD [RS_54] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_195] + Please refer to the previous Group By Operator [GBY_193] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_179] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_144] (rows=261355475 width=135) - Conds:RS_185._col0=RS_190._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_190] - PartitionCols:_col0 - Select Operator [SEL_188] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_186] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_week_seq is not null) - Please refer to the previous TableScan [TS_20] - <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_185] + Group By Operator [GBY_178] (rows=237595882 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_184] (rows=237595882 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=475191764 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 - Select Operator [SEL_14] (rows=475191764 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_143] (rows=475191764 width=135) - Conds:Union 2._col0=RS_170._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_169] - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_183] - PartitionCols:_col0 - Select Operator [SEL_182] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_181] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_148] (rows=144002668 width=135) - Output:["ws_sold_date_sk","ws_ext_sales_price"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_179] - Group By Operator [GBY_178] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_176] - Group By Operator [GBY_174] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_171] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_169] - <-Map 8 [CONTAINS] vectorized - Reduce Output Operator [RS_198] - PartitionCols:_col0 - Select Operator [SEL_197] (rows=287989836 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_196] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_153] (rows=287989836 width=135) - Output:["cs_sold_date_sk","cs_ext_sales_price"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_180] - Please refer to the previous Group By Operator [GBY_178] + Group By Operator [GBY_16] (rows=475191764 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 + Select Operator [SEL_14] (rows=475191764 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_138] (rows=475191764 width=135) + Conds:Union 2._col0=RS_164._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_164] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_163] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + Reduce Output Operator [RS_177] + PartitionCols:_col0 + Select Operator [SEL_176] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_175] (rows=144002668 width=135) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_142] (rows=144002668 width=135) + Output:["ws_sold_date_sk","ws_ext_sales_price"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_173] + Group By Operator [GBY_172] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_170] + Group By Operator [GBY_168] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_165] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_163] + <-Map 7 [CONTAINS] vectorized + Reduce Output Operator [RS_192] + PartitionCols:_col0 + Select Operator [SEL_191] (rows=287989836 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_190] (rows=287989836 width=135) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_147] (rows=287989836 width=135) + Output:["cs_sold_date_sk","cs_ext_sales_price"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_174] + Please refer to the previous Group By Operator [GBY_172] diff --git a/ql/src/test/results/clientpositive/perf/tez/query59.q.out b/ql/src/test/results/clientpositive/perf/tez/query59.q.out index 29cf1366d8..a4be291b1b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query59.q.out @@ -109,177 +109,175 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_235] - Limit [LIM_234] (rows=100 width=88) + File Output Operator [FS_234] + Limit [LIM_233] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_233] (rows=421657640 width=88) + Select Operator [SEL_232] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_60] - Select Operator [SEL_59] (rows=421657640 width=88) + SHUFFLE [RS_59] + Select Operator [SEL_58] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Merge Join Operator [MERGEJOIN_186] (rows=421657640 width=88) - Conds:RS_56._col2, _col1=RS_57._col1, (_col0 - 52)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col12","_col13","_col14","_col15","_col16","_col17"] + Merge Join Operator [MERGEJOIN_185] (rows=421657640 width=88) + Conds:RS_55._col12, _col0=RS_56._col1, (_col0 - 52)(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col12","_col13","_col16","_col17","_col18","_col19","_col20","_col21"] <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_57] + SHUFFLE [RS_56] PartitionCols:_col1, (_col0 - 52) - Select Operator [SEL_55] (rows=383325119 width=88) + Select Operator [SEL_48] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_185] (rows=383325119 width=88) - Conds:RS_52._col1=RS_222._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col11"] + Merge Join Operator [MERGEJOIN_184] (rows=383325119 width=88) + Conds:RS_45._col1=RS_221._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col11"] <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_222] + PARTITION_ONLY_SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_221] (rows=1704 width=1910) + Select Operator [SEL_220] (rows=1704 width=1910) Output:["_col0","_col1"] - Filter Operator [FIL_220] (rows=1704 width=1910) + Filter Operator [FIL_219] (rows=1704 width=1910) predicate:(s_store_id is not null and s_store_sk is not null) - TableScan [TS_46] (rows=1704 width=1910) + TableScan [TS_39] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id"] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_52] + SHUFFLE [RS_45] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_184] (rows=348477374 width=88) - Conds:RS_232._col0=RS_217._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_183] (rows=348477374 width=88) + Conds:RS_231._col0=RS_216._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + SHUFFLE [RS_216] PartitionCols:_col1 - Select Operator [SEL_215] (rows=8116 width=1119) + Select Operator [SEL_214] (rows=8116 width=1119) Output:["_col1"] - Filter Operator [FIL_213] (rows=8116 width=1119) + Filter Operator [FIL_212] (rows=8116 width=1119) predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) TableScan [TS_15] (rows=73049 width=1119) default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_week_seq"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] + SHUFFLE [RS_231] PartitionCols:_col0 - Group By Operator [GBY_231] (rows=316797606 width=88) + Group By Operator [GBY_230] (rows=316797606 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_33] PartitionCols:_col0, _col1 - Group By Operator [GBY_39] (rows=633595212 width=88) + Group By Operator [GBY_32] (rows=633595212 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_37] (rows=633595212 width=88) + Select Operator [SEL_30] (rows=633595212 width=88) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_183] (rows=633595212 width=88) - Conds:RS_230._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_182] (rows=633595212 width=88) + Conds:RS_229._col0=RS_190._col0(Inner),Output:["_col1","_col2","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_191] + PARTITION_ONLY_SHUFFLE [RS_190] PartitionCols:_col0 - Select Operator [SEL_188] (rows=73049 width=1119) + Select Operator [SEL_187] (rows=73049 width=1119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_187] (rows=73049 width=1119) + Filter Operator [FIL_186] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_week_seq is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq","d_day_name"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] + SHUFFLE [RS_229] PartitionCols:_col0 - Select Operator [SEL_229] (rows=575995635 width=88) + Select Operator [SEL_228] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_228] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_28] (rows=575995635 width=88) + Filter Operator [FIL_227] (rows=575995635 width=88) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_46_store_s_store_sk_min) AND DynamicValue(RS_46_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_46_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_21] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_219] - Group By Operator [GBY_218] (rows=1 width=12) + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_196] - Group By Operator [GBY_194] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_195] + Group By Operator [GBY_193] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_192] (rows=73049 width=1119) + Select Operator [SEL_191] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_188] + Please refer to the previous Select Operator [SEL_187] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_227] - Group By Operator [GBY_226] (rows=1 width=12) + BROADCAST [RS_226] + Group By Operator [GBY_225] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_224] + Group By Operator [GBY_223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_223] (rows=1704 width=1910) + Select Operator [SEL_222] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_221] + Please refer to the previous Select Operator [SEL_220] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col2, _col1 - Select Operator [SEL_27] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Merge Join Operator [MERGEJOIN_182] (rows=383325119 width=88) - Conds:RS_24._col1=RS_201._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col12","_col13"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] - PartitionCols:_col0 - Select Operator [SEL_200] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_199] (rows=1704 width=1910) - predicate:(s_store_id is not null and s_store_sk is not null) - TableScan [TS_18] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_181] (rows=348477374 width=88) - Conds:RS_211._col0=RS_216._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] - PartitionCols:_col1 - Select Operator [SEL_214] (rows=8116 width=1119) - Output:["_col1"] - Filter Operator [FIL_212] (rows=8116 width=1119) - predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) - Please refer to the previous TableScan [TS_15] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_211] - PartitionCols:_col0 - Group By Operator [GBY_210] (rows=316797606 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=633595212 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_9] (rows=633595212 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_180] (rows=633595212 width=88) - Conds:RS_209._col0=RS_189._col0(Inner),Output:["_col1","_col2","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_189] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_188] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0 - Select Operator [SEL_208] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_207] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] - Group By Operator [GBY_203] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_202] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_200] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_198] - Group By Operator [GBY_197] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] - Group By Operator [GBY_193] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_190] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_188] + SHUFFLE [RS_55] + PartitionCols:_col12, _col0 + Merge Join Operator [MERGEJOIN_181] (rows=383325119 width=88) + Conds:RS_52._col1=RS_200._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col12","_col13"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_200] + PartitionCols:_col0 + Select Operator [SEL_199] (rows=1704 width=1910) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_198] (rows=1704 width=1910) + predicate:(s_store_id is not null and s_store_sk is not null) + TableScan [TS_18] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_180] (rows=348477374 width=88) + Conds:RS_210._col0=RS_215._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_215] + PartitionCols:_col1 + Select Operator [SEL_213] (rows=8116 width=1119) + Output:["_col1"] + Filter Operator [FIL_211] (rows=8116 width=1119) + predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) + Please refer to the previous TableScan [TS_15] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_210] + PartitionCols:_col0 + Group By Operator [GBY_209] (rows=316797606 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=633595212 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 + Select Operator [SEL_9] (rows=633595212 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_179] (rows=633595212 width=88) + Conds:RS_208._col0=RS_188._col0(Inner),Output:["_col1","_col2","_col4","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_188] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_187] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] + PartitionCols:_col0 + Select Operator [SEL_207] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_206] (rows=575995635 width=88) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_205] + Group By Operator [GBY_204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_201] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_199] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_197] + Group By Operator [GBY_196] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_194] + Group By Operator [GBY_192] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_189] (rows=73049 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_187] diff --git a/ql/src/test/results/clientpositive/perf/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/query95.q.out index 3a8ed092fb..86e892f786 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query95.q.out @@ -121,14 +121,14 @@ Stage-0 SHUFFLE [RS_46] PartitionCols:_col0 Group By Operator [GBY_45] (rows=174243235 width=135) - Output:["_col0"],keys:_col1 + Output:["_col0"],keys:_col14 Merge Join Operator [MERGEJOIN_227] (rows=174243235 width=135) - Conds:RS_41._col0=RS_255._col0(Inner),Output:["_col1"] + Conds:RS_41._col0=RS_255._col13(Inner),Output:["_col14"] <-Map 22 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_255] - PartitionCols:_col0 + PartitionCols:_col13 Select Operator [SEL_254] (rows=14398467 width=92) - Output:["_col0"] + Output:["_col13"] Filter Operator [FIL_253] (rows=14398467 width=92) predicate:wr_order_number is not null TableScan [TS_38] (rows=14398467 width=92) diff --git a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out index b9434b107e..ccecfede51 100644 --- a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out +++ b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out @@ -263,18 +263,18 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes_n0 c left outer join pokes_n0 b on c.foo=b.foo) c left outer join pokes_n0 d where d.foo=1 and c.bar=2 PREHOOK: type: QUERY POSTHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes_n0 c left outer join pokes_n0 b on c.foo=b.foo) c left outer join pokes_n0 d where d.foo=1 and c.bar=2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -318,20 +318,21 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col2 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: int) TableScan alias: d filterExpr: (foo = 1) (type: boolean) @@ -347,11 +348,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: int) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -359,10 +355,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col2 + outputColumnNames: _col2, _col4 Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col2 (type: int), _col1 (type: int) + expressions: _col2 (type: int), _col4 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index c1f3401f42..dfda86fa91 100644 --- a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -29,82 +29,107 @@ JOIN ON a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-18 is a root stage , consists of Stage-27, Stage-28, Stage-2 - Stage-27 has a backup stage: Stage-2 - Stage-16 depends on stages: Stage-27 - Stage-8 depends on stages: Stage-2, Stage-13, Stage-14, Stage-16, Stage-17, Stage-22, Stage-23 , consists of Stage-26, Stage-3 - Stage-26 - Stage-7 depends on stages: Stage-26 - Stage-3 depends on stages: Stage-7 - Stage-28 has a backup stage: Stage-2 - Stage-17 depends on stages: Stage-28 - Stage-2 - Stage-21 is a root stage , consists of Stage-32, Stage-33, Stage-1 - Stage-32 has a backup stage: Stage-1 - Stage-19 depends on stages: Stage-32 - Stage-10 depends on stages: Stage-1, Stage-19, Stage-20 , consists of Stage-31, Stage-2 - Stage-31 - Stage-9 depends on stages: Stage-31 - Stage-15 depends on stages: Stage-9, Stage-11 , consists of Stage-29, Stage-30, Stage-2 - Stage-29 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-29 - Stage-30 has a backup stage: Stage-2 - Stage-14 depends on stages: Stage-30 - Stage-33 has a backup stage: Stage-1 - Stage-20 depends on stages: Stage-33 + Stage-18 is a root stage , consists of Stage-22, Stage-23, Stage-24, Stage-1 + Stage-22 has a backup stage: Stage-1 + Stage-15 depends on stages: Stage-22 + Stage-8 depends on stages: Stage-1, Stage-11, Stage-12, Stage-13, Stage-15, Stage-16, Stage-17 , consists of Stage-20, Stage-21, Stage-2 + Stage-20 + Stage-6 depends on stages: Stage-20 + Stage-2 depends on stages: Stage-6, Stage-7 + Stage-21 + Stage-7 depends on stages: Stage-21 + Stage-23 has a backup stage: Stage-1 + Stage-16 depends on stages: Stage-23 + Stage-24 has a backup stage: Stage-1 + Stage-17 depends on stages: Stage-24 Stage-1 - Stage-24 is a root stage , consists of Stage-34, Stage-35, Stage-2 - Stage-34 has a backup stage: Stage-2 - Stage-22 depends on stages: Stage-34 - Stage-35 has a backup stage: Stage-2 - Stage-23 depends on stages: Stage-35 - Stage-37 is a root stage - Stage-25 depends on stages: Stage-37 - Stage-12 depends on stages: Stage-25 , consists of Stage-36, Stage-2 - Stage-36 - Stage-11 depends on stages: Stage-36 - Stage-0 depends on stages: Stage-3 + Stage-29 is a root stage + Stage-19 depends on stages: Stage-29 + Stage-10 depends on stages: Stage-19 , consists of Stage-28, Stage-1 + Stage-28 + Stage-9 depends on stages: Stage-28 + Stage-14 depends on stages: Stage-9 , consists of Stage-25, Stage-26, Stage-27, Stage-1 + Stage-25 has a backup stage: Stage-1 + Stage-11 depends on stages: Stage-25 + Stage-26 has a backup stage: Stage-1 + Stage-12 depends on stages: Stage-26 + Stage-27 has a backup stage: Stage-1 + Stage-13 depends on stages: Stage-27 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-18 Conditional Operator - Stage: Stage-27 + Stage: Stage-22 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME + Fetch Operator + limit: -1 + $hdt$_1:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) + $hdt$_1:src2 + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) - Stage: Stage-16 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -112,12 +137,15 @@ STAGE PLANS: Stage: Stage-8 Conditional Operator - Stage: Stage-26 + Stage: Stage-20 Map Reduce Local Work Alias -> Map Local Tables: 1 Fetch Operator limit: -1 + 2 + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: 1 TableScan @@ -125,17 +153,27 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) + 2 + TableScan + HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) Group By Operator aggregations: count() mode: hash @@ -151,7 +189,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -174,12 +212,67 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-28 + Stage: Stage-21 + Map Reduce Local Work + Alias -> Map Local Tables: + 0 + Fetch Operator + limit: -1 + 2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 0 + TableScan + HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) + 2 + TableScan + HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 + $hdt$_0:src1 + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: $INTNAME TableScan @@ -187,6 +280,107 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-24 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + $hdt$_1:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $hdt$_1:src2 + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Stage: Stage-17 Map Reduce @@ -195,10 +389,12 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -214,15 +410,41 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string) @@ -233,11 +455,13 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -250,28 +474,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-21 - Conditional Operator - - Stage: Stage-32 + Stage: Stage-29 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src2 + $hdt$_2:$hdt$_3:t1_n94 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src2 + $hdt$_2:$hdt$_3:t1_n94 TableScan - alias: src2 + alias: t1_n94 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -281,7 +502,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 + alias: src filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -312,7 +533,7 @@ STAGE PLANS: Stage: Stage-10 Conditional Operator - Stage: Stage-31 + Stage: Stage-28 Map Reduce Local Work Alias -> Map Local Tables: 1 @@ -346,99 +567,29 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-15 + Stage: Stage-14 Conditional Operator - Stage: Stage-29 + Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME + Fetch Operator + limit: -1 + $hdt$_1:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - - Stage: Stage-13 - Map Reduce - Map Operator Tree: + 2 _col0 (type: string) + $hdt$_1:src2 TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-30 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-33 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:src1 - TableScan - alias: src1 + alias: src2 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -452,12 +603,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) - Stage: Stage-20 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -470,24 +622,45 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 2 _col0 (type: string) + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-1 - Map Reduce - Map Operator Tree: + Stage: Stage-26 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + $hdt$_0:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $hdt$_0:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -499,11 +672,15 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: TableScan alias: src2 filterExpr: key is not null (type: boolean) @@ -515,140 +692,60 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - handleSkewJoin: true - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-24 - Conditional Operator - - Stage: Stage-34 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-22 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-35 + Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $hdt$_0:src1 Fetch Operator limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-23 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-37 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:t1_n94 + $hdt$_1:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:t1_n94 + $hdt$_0:src1 TableScan - alias: t1_n94 + alias: src1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - - Stage: Stage-25 - Map Reduce - Map Operator Tree: + 2 _col0 (type: string) + $hdt$_1:src2 TableScan - alias: src + alias: src2 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -658,58 +755,37 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-12 - Conditional Operator + 2 _col0 (type: string) - Stage: Stage-36 - Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - - Stage: Stage-11 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index 7366408ef9..e756cb16ec 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -46,9 +46,8 @@ POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' over POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3_n4 -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[18][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-4:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -56,39 +55,38 @@ POSTHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a + alias: c filterExpr: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: d filterExpr: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -96,7 +94,7 @@ STAGE PLANS: keys: 0 1 - Statistics: Num rows: 1 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4281 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -104,78 +102,58 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4141 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4281 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: c + alias: a filterExpr: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE TableScan - alias: d + alias: b filterExpr: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4281 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 - Statistics: Num rows: 1 Data size: 4281 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 2 + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -183,11 +161,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[40][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[50][bigTable=?] in task 'Stage-10:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-5:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -195,190 +170,55 @@ POSTHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-13 is a root stage - Stage-9 depends on stages: Stage-13 - Stage-8 depends on stages: Stage-9, Stage-10 , consists of Stage-11, Stage-12, Stage-2 - Stage-11 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-11 - Stage-12 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-12 - Stage-2 - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-13 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:b + $hdt$_0:a + Fetch Operator + limit: -1 + $hdt$_1:b + Fetch Operator + limit: -1 + $hdt$_2:$hdt$_2:c Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:b + $hdt$_0:a TableScan - alias: b + alias: a filterExpr: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 1 - - Stage: Stage-9 - Map Reduce - Map Operator Tree: + 2 + $hdt$_1:b TableScan - alias: a + alias: b filterExpr: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator keys: 0 1 - Statistics: Num rows: 1 Data size: 4141 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-12 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4141 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4281 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-14 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:c + 2 + $hdt$_2:$hdt$_2:c TableScan alias: c filterExpr: (key = 5) (type: boolean) @@ -393,7 +233,7 @@ STAGE PLANS: 0 1 - Stage: Stage-10 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -412,12 +252,26 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1 Data size: 4281 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -428,11 +282,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[40][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[50][bigTable=?] in task 'Stage-10:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-5:MAPRED' is a cross product PREHOOK: query: select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY PREHOOK: Input: default@smb_bucket_1_n4 diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 00d5b6532a..18e75aa231 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -1646,15 +1646,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1673,50 +1672,52 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b + alias: a filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: a + alias: d filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: int) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1741,25 +1742,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1812,15 +1794,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1839,50 +1820,52 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b + alias: a filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: a + alias: d filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: int) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1907,25 +1890,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3395,6 +3359,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + 2 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3415,6 +3380,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + 2 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3439,34 +3405,27 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 2 Map 3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3512,6 +3471,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + 2 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3532,6 +3492,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + 2 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3556,34 +3517,27 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 2 Map 3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out index f372eefebb..abea0dac7b 100644 --- a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out @@ -703,8 +703,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -726,7 +725,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: t1_n127 @@ -745,7 +744,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: t3_n29 @@ -770,37 +769,24 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 33 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 33 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join28.q.out b/ql/src/test/results/clientpositive/spark/join28.q.out index 40d328c2bf..30b2b7548d 100644 --- a/ql/src/test/results/clientpositive/spark/join28.q.out +++ b/ql/src/test/results/clientpositive/spark/join28.q.out @@ -33,43 +33,45 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -78,7 +80,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -94,39 +96,28 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 + 2 _col0 (type: string) + outputColumnNames: _col1, _col3 input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - input vertices: - 0 Map 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n11 + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n11 Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 9c1e4361d8..a4d1447810 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -1053,17 +1053,16 @@ SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `t5`.`key`, `t0`.`value`, `t5`.`value` AS `value1` +OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t4`.`value` AS `value1` FROM (SELECT `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `t4`.`key`, `t4`.`value` -FROM (SELECT `key` +INNER JOIN ((SELECT `key` FROM `default`.`src` WHERE `key` IS NOT NULL) AS `t2` INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) AS `t5` ON `t0`.`value` = `t5`.`value` +WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -1075,26 +1074,26 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 1 + 1 _col0 (type: string) + Position of Big Table: 0 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1103,36 +1102,33 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1140,36 +1136,38 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + /src1 [$hdt$_2:x] Map 3 Map Operator Tree: TableScan - alias: x - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Execution mode: vectorized @@ -1180,33 +1178,36 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1214,26 +1215,24 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [$hdt$_1:$hdt$_2:x] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -1256,61 +1255,57 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - input vertices: - 0 Map 1 - Position of Big Table: 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2_n1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j2_n1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2_n1 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j2_n1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1366,7 +1361,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /src [$hdt$_1:y] Stage: Stage-0 Move Operator @@ -1530,16 +1525,15 @@ SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t4`.`value` AS `value1` +OPTIMIZED SQL: SELECT `t2`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `t3`.`key`, `t3`.`value` -FROM (SELECT `key` -FROM `default`.`src`) AS `t1` -RIGHT JOIN (SELECT `key`, `value` +INNER JOIN ((SELECT `key`, `value` FROM `default`.`src1` -WHERE `value` IS NOT NULL) AS `t3` ON `t1`.`key` = `t3`.`key`) AS `t4` ON `t0`.`value` = `t4`.`value` +WHERE `value` IS NOT NULL) AS `t2` +LEFT JOIN (SELECT `key` +FROM `default`.`src`) AS `t3` ON `t2`.`key` = `t3`.`key`) ON `t0`.`value` = `t2`.`value` STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -1551,26 +1545,21 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 1 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1579,27 +1568,23 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1609,6 +1594,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1616,33 +1602,40 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] - Map 2 + /src [$hdt$_2:y] + Map 3 Map Operator Tree: TableScan - alias: y + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 1 + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1651,23 +1644,27 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1677,7 +1674,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1685,26 +1681,24 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: x @@ -1721,67 +1715,63 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col1 input vertices: - 0 Map 2 - Position of Big Table: 1 + 1 Map 2 + Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - input vertices: - 0 Map 1 - Position of Big Table: 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2_n1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j2_n1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2_n1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j2_n1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1837,7 +1827,7 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_1:$hdt$_2:x] + /src1 [$hdt$_1:x] Stage: Stage-0 Move Operator @@ -2024,23 +2014,23 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2048,18 +2038,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Execution mode: vectorized Local Work: @@ -2069,7 +2059,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -2090,34 +2080,30 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - input vertices: - 0 Map 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2275,42 +2261,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: y - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: x - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Execution mode: vectorized Local Work: @@ -2320,7 +2306,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -2341,34 +2327,30 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - input vertices: - 0 Map 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out index ab25791aa6..bdf7eb6eb2 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out @@ -22,43 +22,45 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -67,7 +69,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -83,38 +85,27 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 + 2 _col0 (type: string) + outputColumnNames: _col1, _col3 input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - input vertices: - 0 Map 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -278,43 +269,45 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -323,7 +316,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -339,38 +332,27 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 + 2 _col0 (type: string) + outputColumnNames: _col1, _col3 input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - input vertices: - 0 Map 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out index 112c1871e9..ccb9df22f7 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out @@ -105,6 +105,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) + 2 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -124,7 +125,8 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: int) - 1 _col0 (type: int) + 1 _col1 (type: int) + 2 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -149,34 +151,27 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 66 Basic stats: COMPLETE Column stats: NONE + 2 Map 3 + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: int), _col2 (type: string), _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col3 (type: int), _col2 (type: string), _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out index 57ab0ecb63..86d2534f48 100644 --- a/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out @@ -29,23 +29,46 @@ JOIN ON a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-10, Stage-9 - Stage-10 - Stage-6 depends on stages: Stage-10 - Stage-9 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-9 - Stage-4 depends on stages: Stage-5 , consists of Stage-8, Stage-2 - Stage-8 - Stage-3 depends on stages: Stage-8 - Stage-2 depends on stages: Stage-3 + Stage-8 is a root stage + Stage-1 depends on stages: Stage-8 + Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-7, Stage-2 + Stage-6 + Stage-3 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-3, Stage-4 + Stage-7 + Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 STAGE PLANS: + Stage: Stage-8 + Spark +#### A masked pattern was here #### + Vertices: + Map 6 + Map Operator Tree: + TableScan + alias: t1_n94 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -67,7 +90,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -86,105 +109,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - handleSkewJoin: true - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-10 - Spark -#### A masked pattern was here #### - Vertices: - Map 13 - Map Operator Tree: - TableScan - Spark HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 12 - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-9 - Spark -#### A masked pattern was here #### - Vertices: - Map 7 - Map Operator Tree: - TableScan - alias: t1_n94 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-5 - Spark - Edges: - Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 11 - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan alias: src @@ -205,7 +130,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 7 + 1 Map 6 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -215,16 +140,18 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -237,10 +164,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Conditional Operator - Stage: Stage-8 + Stage: Stage-6 Spark #### A masked pattern was here #### Vertices: @@ -251,6 +178,17 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) + Local Work: + Map Reduce Local Work + Map 9 + Map Operator Tree: + TableScan + Spark HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) Local Work: Map Reduce Local Work @@ -258,15 +196,17 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 8 Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) Group By Operator aggregations: count() mode: hash @@ -285,10 +225,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 8 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 8 + Map 7 Map Operator Tree: TableScan Reduce Output Operator @@ -296,7 +236,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -312,6 +252,61 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-7 + Spark +#### A masked pattern was here #### + Vertices: + Map 11 + Map Operator Tree: + TableScan + Spark HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) + Local Work: + Map Reduce Local Work + Map 13 + Map Operator Tree: + TableScan + Spark HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + 2 reducesinkkey0 (type: string) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index 16fa81870c..d13bb6fde6 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -46,9 +46,8 @@ POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' over POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3_n4 -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[18][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 6' is a cross product -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_2, $hdt$_3]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -63,9 +62,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -83,7 +81,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: b @@ -98,7 +96,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -113,7 +111,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: d @@ -133,34 +131,24 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 - Statistics: Num rows: 1 Data size: 4141 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4141 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE + 2 + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int), 5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -179,9 +167,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[26][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -189,63 +176,50 @@ POSTHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a filterExpr: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 1 + 2 Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: a + alias: b filterExpr: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator keys: 0 1 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 4141 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + 2 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -295,19 +269,22 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 + 2 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE + 1 Map 2 + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int), 5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8423 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8422 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -322,9 +299,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[26][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY PREHOOK: Input: default@smb_bucket_1_n4 diff --git a/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out b/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out index 26dd1f059f..d012f30a53 100644 --- a/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out @@ -148,123 +148,104 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 43), Map 5 (PARTITION-LEVEL SORT, 43) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 43), Reducer 7 (PARTITION-LEVEL SORT, 43) - Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 13), Map 8 (PARTITION-LEVEL SORT, 13) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 55), Map 4 (PARTITION-LEVEL SORT, 55), Reducer 6 (PARTITION-LEVEL SORT, 55) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 43), Map 7 (PARTITION-LEVEL SORT, 43) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 filterExpr: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 filterExpr: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: src1 + alias: src2 filterExpr: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan - alias: src2 + alias: src1 filterExpr: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 400 Data size: 4265 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col2,_col1) (type: int) + expressions: hash(_col0,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 400 Data size: 4265 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) mode: hash @@ -274,7 +255,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -289,7 +270,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -297,13 +278,18 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 53 Data size: 561 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 53 Data size: 561 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index e2d51e85cf..76b3e504bb 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -1876,7 +1876,7 @@ POSTHOOK: Input: default@part_null_n0 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0) @@ -1969,14 +1969,10 @@ STAGE PLANS: Filter Operator predicate: (_col5 >= _col9) (type: boolean) Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + sort order: Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -1985,10 +1981,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11 Statistics: Num rows: 9 Data size: 1193 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col7 <= _col9) (type: boolean) + predicate: (_col7 <= _col11) (type: boolean) Statistics: Num rows: 3 Data size: 397 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -2039,7 +2035,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4982,7 +4978,7 @@ POSTHOOK: Input: default@emps_n4 110 John 40 M Vancouver 2 NULL false true 2002-05-03 120 Wilma 20 F NULL 1 5 NULL true 2005-09-07 130 Alice 40 F Vancouver 2 NULL false true 2007-01-01 -Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select * from emps_n4 where deptno <> (select sum(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3) PREHOOK: type: QUERY POSTHOOK: query: explain select * from emps_n4 where deptno <> (select sum(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3) @@ -5075,14 +5071,10 @@ STAGE PLANS: Filter Operator predicate: (UDFToLong(_col2) <> _col10) (type: boolean) Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 261 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) Reducer 3 Reduce Operator Tree: Join Operator @@ -5091,10 +5083,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col12 Statistics: Num rows: 5 Data size: 306 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToLong(_col0) > _col10) (type: boolean) + predicate: (UDFToLong(_col0) > _col12) (type: boolean) Statistics: Num rows: 1 Data size: 61 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) diff --git a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 87983138aa..0365b206de 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -31,8 +31,8 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) + 0 _col0 (type: smallint) + 1 _col1 (type: smallint) Execution mode: vectorized Map Vectorization: enabled: true @@ -48,20 +48,20 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: v3 - filterExpr: csmallint is not null (type: boolean) + alias: v2 + filterExpr: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: csmallint is not null (type: boolean) + predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: csmallint (type: smallint) + expressions: ctinyint (type: tinyint) outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) + 0 _col1 (type: tinyint) + 1 _col0 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -84,49 +84,45 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: v2 - filterExpr: ctinyint is not null (type: boolean) + alias: v3 + filterExpr: csmallint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ctinyint is not null (type: boolean) + predicate: csmallint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint) + expressions: csmallint (type: smallint) outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) - outputColumnNames: _col2, _col3 + 0 _col0 (type: smallint) + 1 _col1 (type: smallint) + outputColumnNames: _col1, _col3 input vertices: 1 Map 3 Statistics: Num rows: 13516 Data size: 3198793 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: smallint), _col3 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 3198793 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 14867 Data size: 3518672 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 14867 Data size: 3518672 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + value expressions: _col0 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -175,4 +171,4 @@ POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -6.065190932486892E11 +6.06519093248863E11 diff --git a/ql/src/test/results/clientpositive/vector_outer_join6.q.out b/ql/src/test/results/clientpositive/vector_outer_join6.q.out index 3c24928ffa..add5fa175c 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join6.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -130,7 +130,7 @@ POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -{"optimizedSQL":"SELECT `t1`.`tj1rnum`, `t1`.`tj2rnum`, `t2`.`rnum` AS `rnumt3`\nFROM (SELECT `t0`.`rnum` AS `tj1rnum`, `t`.`rnum` AS `tj2rnum`, `t`.`c1` AS `tj2c1`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t`\nRIGHT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`) AS `t1`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin3`) AS `t2` ON `t1`.`tj2c1` = `t2`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_0:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_0:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_21"}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_8","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_9","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_19"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_23","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col1":"0:_col1","_col2":"1:_col0"},"condition map:":[{"":"Right Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col2 (type: int), _col0 (type: int), _col1 (type: int)","columnExprMap:":{"_col0":"_col2","_col1":"_col0","_col2":"_col1"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 0, 1]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_25","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col1":"0:_col1","_col3":"1:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 2:int","col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col3"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_26","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col1 (type: int), _col3 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col1","_col2":"_col3"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_27","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_28"}}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_29"}}}}}} +{"optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `tj2rnum`, `t1`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-4":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-4"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}}}}},"Stage-4":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_16","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0","_col4":"2:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"},{"":"Left Outer Join 1 to 2"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","One MapJoin Condition IS false"]},"outputColumnNames:":["_col0","_col2","_col4"],"Statistics:":"Num rows: 8 Data size: 818 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_17","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col4 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2","_col2":"_col4"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2]"},"Statistics:":"Num rows: 8 Data size: 818 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 8 Data size: 818 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_20"}}}}}} PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -157,7 +157,7 @@ POSTHOOK: query: explain vectorization detail formatted select tj1rnum, tj2rnum as rnumt3 from (select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 POSTHOOK: type: QUERY -{"optimizedSQL":"SELECT `t1`.`tj1rnum`, `t1`.`tj2rnum` AS `rnumt3`\nFROM (SELECT `t0`.`rnum` AS `tj1rnum`, `t`.`rnum` AS `tj2rnum`, `t`.`c1` AS `tj2c1`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t`\nRIGHT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`) AS `t1`\nLEFT JOIN (SELECT `c1`\nFROM `default`.`tjoin3`) AS `t2` ON `t1`.`tj2c1` = `t2`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_0:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_0:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_21"}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_8","children":{"Select Operator":{"expressions:":"c1 (type: int)","columnExprMap:":{"_col0":"c1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_9","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_19"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_23","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col1":"0:_col1","_col2":"1:_col0"},"condition map:":[{"":"Right Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col2 (type: int), _col0 (type: int), _col1 (type: int)","columnExprMap:":{"_col0":"_col2","_col1":"_col0","_col2":"_col1"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 0, 1]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_25","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 2:int","col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_26","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_27"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_28"}}}}}} +{"optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-4":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-4"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"c1 (type: int)","columnExprMap:":{"_col0":"c1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}}}}},"Stage-4":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_16","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"},{"":"Left Outer Join 1 to 2"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","One MapJoin Condition IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 8 Data size: 818 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_17","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 8 Data size: 818 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 8 Data size: 818 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_20"}}}}}} PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY